diff options
Diffstat (limited to 'arm_compute/core')
517 files changed, 4903 insertions, 51791 deletions
diff --git a/arm_compute/core/AccessWindowAutoPadding.h b/arm_compute/core/AccessWindowAutoPadding.h deleted file mode 100644 index 8a182c6eb4..0000000000 --- a/arm_compute/core/AccessWindowAutoPadding.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H -#define ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Dummy access window. - * - * This implementation always uses the auto padding of the tensor info and - * never updates the window. The valid region is always set to cover the entire - * tensor. - * - * @note This access window is only used during the migration to the new - * padding system. It will be removed once all kernels have been ported. - * - * */ -class AccessWindowAutoPadding : public IAccessWindow -{ -public: - /** Default constructor. - * - * @param[in,out] info Tensor info of the accessed kernel. - */ - AccessWindowAutoPadding(ITensorInfo *info); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowAutoPadding(const AccessWindowAutoPadding &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowAutoPadding &operator=(const AccessWindowAutoPadding &) = delete; - /** Allow instances of this class to be move constructed */ - AccessWindowAutoPadding(AccessWindowAutoPadding &&) = default; - /** Allow instances of this class to be moved */ - AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default; - /** Default destructor */ - ~AccessWindowAutoPadding() = default; - - /** Set the valid region to match the entire tensor. */ - void set_valid_region(); - - /** Return a valid region that spans across the entire tensor. - * - * @return a valid region. - * - */ - ValidRegion compute_valid_region() const; - - // Inherited methods overridden: - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; - -private: - ITensorInfo *_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ACCESS_WINDOW_AUTO_PADDING_H*/ diff --git a/arm_compute/core/AccessWindowStatic.h b/arm_compute/core/AccessWindowStatic.h deleted file mode 100644 index e40c188fcd..0000000000 --- a/arm_compute/core/AccessWindowStatic.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IACCESS_WINDOW_STATIC_H -#define ARM_COMPUTE_IACCESS_WINDOW_STATIC_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -#include <array> - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Implementation of a static rectangular access pattern. - * - * In this implementation the access offsets and sizes are not relative to the - * current element. Instead they are considered to be absolute coordinates - * within the accessed tensor's shape. - * - * */ -class AccessWindowStatic : public IAccessWindow -{ -public: - /** Constructor for a static access pattern. - * - * @param[in,out] info Tensor info of the accessed kernel. - * @param[in] start_x Start of the access in X direction. - * @param[in] start_y Start of the access in Y direction. - * @param[in] end_x End of the access in X direction. - * @param[in] end_y End of the access in Y direction. - */ - AccessWindowStatic(ITensorInfo *info, int start_x, int start_y, int end_x, int end_y); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowStatic(const AccessWindowStatic &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - AccessWindowStatic &operator=(const AccessWindowStatic &) = delete; - /** Allow instances of this class to be move constructed */ - AccessWindowStatic(AccessWindowStatic &&) = default; - /** Allow instances of this class to be moved */ - AccessWindowStatic &operator=(AccessWindowStatic &&) = default; - /** Default destructor */ - ~AccessWindowStatic() = default; - - /** Set the valid region based on the static access pattern and valid - * region of the inputs. - * - * @param[in] window Execution window of the kernel. - * @param[in] input_valid_region Combined valid region of all inputs. - */ - void set_valid_region(const Window &window, const ValidRegion &input_valid_region); - - /** Compute the valid region based on the static access pattern and valid region of the inputs. - * - * @param[in] window Execution window of the kernel. - * @param[in] input_valid_region Combined valid region of all inputs. - * - * @return a valid region. - * - */ - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const; - - // Inherited methods overriden: - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; - -private: - ITensorInfo *_info; - int _start_x; - int _start_y; - int _end_x; - int _end_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_IACCESS_WINDOW_STATIC_H*/ diff --git a/arm_compute/core/AccessWindowTranspose.h b/arm_compute/core/AccessWindowTranspose.h deleted file mode 100644 index 16105bce7c..0000000000 --- a/arm_compute/core/AccessWindowTranspose.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H -#define ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class Window; -class ITensorInfo; - -/** Implementation of a XY-transpose access pattern. */ -class AccessWindowTranspose : public AccessWindowRectangle -{ -public: - using AccessWindowRectangle::AccessWindowRectangle; - bool update_window_if_needed(Window &window) const override; - bool update_padding_if_needed(const Window &window) override; - using AccessWindowRectangle::compute_valid_region; - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H*/ diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h index 2b6d8cd2cb..dcd3b45670 100644 --- a/arm_compute/core/CL/CLCompileContext.h +++ b/arm_compute/core/CL/CLCompileContext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,8 @@ public: */ const StringSet &options() const; + bool operator==(const CLBuildOptions &other) const; + private: StringSet _build_opts; /**< Build options set */ }; @@ -118,6 +120,14 @@ public: { return _name; } + /** Returns program binary data. + * + * @return Program's binary data. + */ + const std::vector<unsigned char> &binary() const + { + return _binary; + } /** User-defined conversion to the underlying CL program. * * @return The CL program object. @@ -240,8 +250,12 @@ public: * * @return The created kernel. */ - Kernel create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source, - const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const; + Kernel create_kernel(const std::string &kernel_name, + const std::string &program_name, + const std::string &program_source, + const std::string &kernel_path, + const StringSet &build_options_set, + bool is_binary) const; /** Clear the library's cache of binary programs */ @@ -288,6 +302,24 @@ public: */ bool int64_base_atomics_supported() const; + /* Returns true if the workgroup batch size modifier parameter is supported on the cl device + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ + bool is_wbsm_supported() const; + + /** Return the DDK version. If the DDK version cannot be detected, return -1. + * + * @return The DDK version. + */ + int32_t get_ddk_version() const; + + /** Return the Gpu target of the associated device + * + * @return GPUTarget + */ + GPUTarget get_gpu_target() const; + private: /** Load program and its dependencies. * @@ -295,7 +327,8 @@ private: * @param[in] program_source Source of the program. * @param[in] is_binary Flag to indicate if the program source is binary. */ - const Program &load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const; + const Program & + load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const; /** Generates the build options given a string of user defined ones * @@ -315,10 +348,11 @@ private: */ std::string stringify_set(const StringSet &s, const std::string &kernel_path) const; - cl::Context _context; /**< Underlying CL context. */ - CLDevice _device; /**< Underlying CL device. */ + cl::Context _context; /**< Underlying CL context. */ + CLDevice _device; /**< Underlying CL device. */ mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */ mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */ + bool _is_wbsm_supported; /**< Support of worksize batch size modifier support boolean*/ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */ diff --git a/arm_compute/core/CL/CLCoreRuntimeContext.h b/arm_compute/core/CL/CLCoreRuntimeContext.h deleted file mode 100644 index 2b2269dece..0000000000 --- a/arm_compute/core/CL/CLCoreRuntimeContext.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H -#define ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H - -#include "arm_compute/core/CL/OpenCL.h" - -namespace arm_compute -{ -// Forward declarations -class CLKernelLibrary; - -/** Core runtime context for OpenCL */ -class CLCoreRuntimeContext final -{ -public: - /** Legacy constructor */ - CLCoreRuntimeContext(); - - /** Constructor */ - CLCoreRuntimeContext(CLKernelLibrary *kernel_lib, cl::Context ctx, cl::CommandQueue queue); - /** Destructor */ - ~CLCoreRuntimeContext() = default; - /** Default copy constructor */ - CLCoreRuntimeContext(const CLCoreRuntimeContext &) = default; - /** Default move constructor */ - CLCoreRuntimeContext(CLCoreRuntimeContext &&) = default; - /** Default copy assignment */ - CLCoreRuntimeContext &operator=(const CLCoreRuntimeContext &) = default; - /** Default move assignment operator */ - CLCoreRuntimeContext &operator=(CLCoreRuntimeContext &&) = default; - /** Kernel Library accessor - * - * @return The kernel library instance used by the core context - */ - CLKernelLibrary *kernel_library() const; - /** OpenCL context accessor - * - * @return The OpenCL context used by the core context - */ - cl::Context context(); - /** OpenCL command queue accessor - * - * @return The OpenCL queue used by the core context - */ - cl::CommandQueue queue(); - -private: - CLKernelLibrary *_kernel_lib{ nullptr }; - cl::Context _ctx{}; - cl::CommandQueue _queue{}; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCORERUNTIME_CONTEXT_H */ diff --git a/arm_compute/core/CL/CLDevice.h b/arm_compute/core/CL/CLDevice.h index 812834743d..ded6bb8493 100644 --- a/arm_compute/core/CL/CLDevice.h +++ b/arm_compute/core/CL/CLDevice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/IDevice.h" #include <set> +#include <sstream> #include <string> namespace arm_compute @@ -43,8 +44,7 @@ class CLDevice : public IDevice { public: /** Default Constructor */ - CLDevice() - : _device(cl::Device()), _options() + CLDevice() : _device(cl::Device()), _options() { } @@ -52,8 +52,7 @@ public: * * @param[in] cl_device OpenCL device */ - CLDevice(const cl::Device &cl_device) - : _device(), _options() + CLDevice(const cl::Device &cl_device) : _device(), _options() { _device = cl_device; @@ -65,13 +64,13 @@ public: std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>(); std::istringstream iss(extensions); - for(std::string s; iss >> s;) + for (std::string s; iss >> s;) { _options.extensions.insert(s); } // SW workaround for G76 - if(_options.gpu_target == GPUTarget::G76) + if (_options.gpu_target == GPUTarget::G76) { _options.extensions.insert("cl_arm_integer_dot_product_int8"); } @@ -142,6 +141,32 @@ public: return _options.extensions.count(extension) != 0; } + /** Returns whether non-uniform workgroup is supported and the build options. + * + * If the feature is supported, the appropriate build options will be + * appended to the specified string. + * + * @return A tuple (supported, build_options) indicating whether the feature + * is supported and the corresponding build options to enable it. + */ + std::tuple<bool, std::string> is_non_uniform_workgroup_supported() const + { + if (version() == CLVersion::CL30 && get_cl_non_uniform_work_group_supported(_device)) + { + return {true, " -cl-std=CL3.0 "}; + } + else if (version() == CLVersion::CL20) + { + return {true, " -cl-std=CL2.0 "}; + } + else if (supported("cl_arm_non_uniform_work_group_size")) + { + return {true, " -cl-arm-non-uniform-work-group-size "}; + } + + return {false, ""}; + } + private: cl::Device _device; /**< OpenCL device. */ struct CLDeviceOptions _options; /**< OpenCL device options */ diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index fc3f4d5db0..1a639e47f9 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,13 +26,13 @@ #include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Types.h" #include <set> #include <string> namespace arm_compute { -class CLCoreRuntimeContext; class CLCompileContext; class CLBuildOptions; @@ -41,6 +41,9 @@ enum class DataType; /** Max vector width of an OpenCL vector */ static constexpr unsigned int max_cl_vector_width = 16; +/** Max number of manual loop unrolling */ +static constexpr int max_manual_loop_unrolling = 128; + /** Translates a tensor data type to the appropriate OpenCL type. * * @param[in] dt @ref DataType to be translated to OpenCL type. @@ -97,14 +100,6 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt); */ std::string get_data_size_from_data_type(const DataType &dt); -/** Translates fixed point tensor data type to the underlying OpenCL type. - * - * @param[in] dt @ref DataType to be translated to OpenCL type. - * - * @return The string specifying the underlying OpenCL type to be used. - */ -std::string get_underlying_cl_type_from_data_type(const DataType &dt); - /** Helper function to get the GPU target from CL device * * @param[in] device A CL device @@ -129,6 +124,14 @@ CLVersion get_cl_version(const cl::Device &device); */ size_t get_cl_image_pitch_alignment(const cl::Device &device); +/** Helper function to check whether non-uniform work group is supported + * + * @param[in] device A CL device + * + * @return True if the feature is supported + */ +bool get_cl_non_uniform_work_group_supported(const cl::Device &device); + /** Helper function to check whether a given extension is supported * * @param[in] device A CL device @@ -176,7 +179,9 @@ bool dot8_acc_supported(const cl::Device &device); * * @return True if the configuration is supported */ -bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout); +bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, + const Size2D &kernel_size, + DataLayout data_layout); /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors * @@ -204,16 +209,6 @@ bool preferred_dummy_work_items_support(const cl::Device &device); */ bool image2d_from_buffer_supported(const cl::Device &device); -/** Creates an opencl kernel - * - * @param[in] ctx A context to be used to create the opencl kernel. - * @param[in] kernel_name The kernel name. - * @param[in] build_opts The build options to be used for the opencl kernel compilation. - * - * @return An opencl kernel - */ -cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts); - /** Creates an opencl kernel using a compile context * * @param[in] ctx A compile context to be used to create the opencl kernel. @@ -222,7 +217,9 @@ cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &ke * * @return An opencl kernel */ -cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>()); +cl::Kernel create_kernel(const CLCompileContext &ctx, + const std::string &kernel_name, + const std::set<std::string> &build_opts = std::set<std::string>()); /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size. * If input width is smaller than 128 we can use fewer threads than 8. @@ -234,5 +231,62 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_ */ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size); +/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device + * + * @param[in] device cl device to check for support + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ +bool get_wbsm_support_info(const cl::Device &device); + +/* Helper function to set the workgroup batch size modifier parameter in the kernel + * + * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter + * @param[in] wbsm_hint workgroup batch size modifier to use + */ +void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint); + +/* Helper function to check if we can export the tensor to cl_image + * + * @param[in] input tensor + * + * @return true if we can export the tensor to cl_image + */ +bool export_to_cl_image(const ITensorInfo *tensor); + +/* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling + * + * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling + * + * @param[in] built_opts OpenCL kernel build options + * @param[in] values Input values (iterations) + * + */ +void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values); + +/** Helper function to check whether the cl_arm_matrix_multiply extension is supported + * + * @param[in] device A CL device + * + * @return True if the extension is supported + */ +bool arm_matrix_multiply_supported(const cl::Device &device); + +/** Check whether cl_khr_command_buffer extension is supported by the specified CL device. + * + * @param[in] device The CL device + * + * @return True if the extension is supported by the CL device. + */ +bool command_buffer_supported(const cl::Device &device); + +/** Check whether cl_khr_command_buffer_mutable_dispatch extension is supported by the specified CL device. + * + * @param[in] device The CL device + * + * @return True if the extension is supported by the CL device. + */ +bool command_buffer_mutable_dispatch_supported(const cl::Device &device); + } // namespace arm_compute #endif /* ARM_COMPUTE_CLHELPERS_H */ diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 6c5df6cb08..527733ccf1 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,7 @@ private: public: /** Access the KernelLibrary singleton. - * This method has been deprecated and will be removed in the next release. + * This method has been deprecated and will be removed in future releases * @return The KernelLibrary instance. */ static CLKernelLibrary &get(); @@ -148,6 +148,12 @@ public: */ std::string get_program_name(const std::string &kernel_name) const; + /* Returns true if the workgroup batch size modifier parameter is supported on the cl device + * + * @return true if the workgroup batch size modifier parameter is supported, false otherwise + */ + bool is_wbsm_supported(); + /** Sets the CL context used to create programs. * * @note Setting the context also resets the device to the @@ -164,11 +170,7 @@ public: CLCompileContext &get_compile_context(); private: - CLCompileContext _compile_context; /**< Compile Context. */ - std::string _kernel_path; /**< Path to the kernels folder. */ - static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */ - static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs. - Used for compile-time kernel inclusion. >*/ + CLCompileContext _compile_context; /**< Compile Context. */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLKERNELLIBRARY_H */ diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h deleted file mode 100644 index cd26399390..0000000000 --- a/arm_compute/core/CL/CLKernels.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLKERNELS_H -#define ARM_COMPUTE_CLKERNELS_H - -/* Header regrouping all the CL kernels */ -#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" -#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" -#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h" -#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h" -#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h" -#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h" -#include "arm_compute/core/CL/kernels/CLComparisonKernel.h" -#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" -#include "arm_compute/core/CL/kernels/CLCopyKernel.h" -#include "arm_compute/core/CL/kernels/CLCropKernel.h" -#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" -#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" -#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" -#include "arm_compute/core/CL/kernels/CLDilateKernel.h" -#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" -#include "arm_compute/core/CL/kernels/CLErodeKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h" -#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLFloorKernel.h" -#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" -#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" -#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h" -#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h" -#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h" -#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHistogramKernel.h" -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h" -#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h" -#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" -#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLMemsetKernel.h" -#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h" -#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h" -#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPermuteKernel.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLRangeKernel.h" -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" -#include "arm_compute/core/CL/kernels/CLRemapKernel.h" -#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLReverseKernel.h" -#include "arm_compute/core/CL/kernels/CLScaleKernel.h" -#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLSelectKernel.h" -#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h" -#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h" -#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h" -#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" -#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h" -#include "arm_compute/core/CL/kernels/CLThresholdKernel.h" -#include "arm_compute/core/CL/kernels/CLTileKernel.h" -#include "arm_compute/core/CL/kernels/CLTransposeKernel.h" -#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h" -#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h" -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" -#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h" -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -#endif /* ARM_COMPUTE_CLKERNELS_H */ diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h index 3643b178d3..0f088e2b10 100644 --- a/arm_compute/core/CL/CLTypes.h +++ b/arm_compute/core/CL/CLTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,8 @@ enum class CLVersion CL10, /* the OpenCL 1.0 */ CL11, /* the OpenCL 1.1 */ CL12, /* the OpenCL 1.2 */ - CL20, /* the OpenCL 2.0 and above */ + CL20, /* the OpenCL 2.x */ + CL30, /* the OpenCL 3.x */ UNKNOWN /* unkown version */ }; @@ -62,18 +63,27 @@ struct CLDeviceOptions struct CLQuantization { /** Default Constructor */ - CLQuantization() - : scale(nullptr), offset(nullptr) {}; + CLQuantization() : scale(nullptr), offset(nullptr){}; /** Constructor * * @param[in] scale OpenCL scale array * @param[in] offset OpenCL offset array */ - CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) - : scale(scale), offset(offset) {}; + CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset) : scale(scale), offset(offset){}; const ICLFloatArray *scale; /**< Quantization scale array */ const ICLInt32Array *offset; /**< Quantization offset array */ }; + +enum CLKernelType +{ + UNKNOWN, /**< Unknown CL kernel type */ + DEPTHWISE, /**< Depthwise CL kernel type */ + DIRECT, /**< Direct Convolution CL kernel type */ + ELEMENTWISE, /**< Elementwise CL kernel type */ + GEMM, /**< GEMM CL kernel type */ + POOL, /**< Pool CL kernel type */ + WINOGRAD /**< Winograd CL kernel type */ +}; } // namespace arm_compute #endif /* ARM_COMPUTE_CL_TYPES_H */ diff --git a/arm_compute/core/CL/CLValidate.h b/arm_compute/core/CL/CLValidate.h deleted file mode 100644 index 8f1733dcfe..0000000000 --- a/arm_compute/core/CL/CLValidate.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_VALIDATE_H -#define ARM_COMPUTE_CL_VALIDATE_H - -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) - -#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported())) - -/** Return an error if int64_base_atomics extension is not supported by the device. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * - * @return Status - */ -inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line) -{ - if(!CLKernelLibrary::get().int64_base_atomics_supported()) - { - return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported"); - } - return arm_compute::Status{}; -} - -#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); - -#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__)); - -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_VALIDATE_H */ diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h index e11fb95bf8..a2b2baa5b3 100644 --- a/arm_compute/core/CL/ICLArray.h +++ b/arm_compute/core/CL/ICLArray.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,8 +40,7 @@ public: * @param[in] max_num_values Maximum size of the array. * */ - explicit ICLArray(size_t max_num_values) - : IArray<T>(max_num_values), _mapping(nullptr) + explicit ICLArray(size_t max_num_values) : IArray<T>(max_num_values), _mapping(nullptr) { } @@ -66,8 +65,6 @@ public: * @param[in] blocking If true, then the mapping will be ready to use by the time * this method returns, else it is the caller's responsibility * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - * - * @return The mapping address. */ void map(cl::CommandQueue &q, bool blocking = true) { @@ -115,14 +112,6 @@ private: uint8_t *_mapping; }; -/** Interface for OpenCL Array of Key Points. */ -using ICLKeyPointArray = ICLArray<KeyPoint>; -/** Interface for OpenCL Array of 2D Coordinates. */ -using ICLCoordinates2DArray = ICLArray<Coordinates2D>; -/** Interface for OpenCL Array of Detection Windows. */ -using ICLDetectionWindowArray = ICLArray<DetectionWindow>; -/** Interface for OpenCL Array of 2D Sizes. */ -using ICLSize2DArray = ICLArray<Size2D>; /** Interface for OpenCL Array of uint8s. */ using ICLUInt8Array = ICLArray<cl_uchar>; /** Interface for OpenCL Array of uint16s. */ @@ -135,5 +124,5 @@ using ICLInt16Array = ICLArray<cl_short>; using ICLInt32Array = ICLArray<cl_int>; /** Interface for OpenCL Array of floats. */ using ICLFloatArray = ICLArray<cl_float>; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLARRAY_H*/ diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h deleted file mode 100644 index a9bafe3d5a..0000000000 --- a/arm_compute/core/CL/ICLDistribution1D.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLDISTRIBUTION1D_H -#define ARM_COMPUTE_ICLDISTRIBUTION1D_H - -#include "arm_compute/core/IDistribution1D.h" - -#include <cstddef> -#include <cstdint> - -namespace cl -{ -class Buffer; -class CommandQueue; -} - -namespace arm_compute -{ -/** ICLDistribution1D interface class */ -class ICLDistribution1D : public IDistribution1D -{ -public: - /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1] - * defined by a start offset and valid range, divided equally into num_bins parts. - * - * @param[in] num_bins The number of bins the distribution is divided in. - * @param[in] offset The start of the values to use. - * @param[in] range The total number of the consecutive values of the distribution interval. - */ - ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLDistribution1D(const ICLDistribution1D &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete; - /** Enqueue a map operation of the allocated buffer on the given queue. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - void map(cl::CommandQueue &q, bool blocking = true); - /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - void unmap(cl::CommandQueue &q); - /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data. - * - * @return A reference to an OpenCL buffer containing the distribution's data. - */ - virtual cl::Buffer &cl_buffer() = 0; - // Inherited methods overridden: - uint32_t *buffer() const override; - -protected: - /** Method to be implemented by the child class to map the OpenCL buffer - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0; - /** Method to be implemented by the child class to unmap the OpenCL buffer - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - virtual void do_unmap(cl::CommandQueue &q) = 0; - -protected: - uint32_t *_mapping; /**< The distribution data. */ -}; -} -#endif /* ARM_COMPUTE_ICLDISTRIBUTION1D_H */ diff --git a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h b/arm_compute/core/CL/ICLGEMMKernelConfiguration.h deleted file mode 100644 index e5f4a78297..0000000000 --- a/arm_compute/core/CL/ICLGEMMKernelConfiguration.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H -#define ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H - -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Basic interface for the GEMM kernel configuration */ -class ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] arch GPU target - */ - ICLGEMMKernelConfiguration(GPUTarget arch) - : _target(arch) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete; - /** Default Move Constructor. */ - ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default; - /** Default move assignment operator */ - ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default; - /** Virtual destructor */ - virtual ~ICLGEMMKernelConfiguration() = default; - /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used - * - * @param[in] m Number of rows LHS matrix - * @param[in] n Number of columns RHS matrix - * @param[in] k Number of columns LHS matrix or number of rows RHS matrix - * @param[in] b Batch size - * @param[in] data_type Data type - */ - virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0; - -protected: - GPUTarget _target; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h deleted file mode 100644 index b42566ef11..0000000000 --- a/arm_compute/core/CL/ICLHOG.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLHOG_H -#define ARM_COMPUTE_ICLHOG_H - -#include "arm_compute/core/IHOG.h" - -#include <cstdint> - -namespace cl -{ -class Buffer; -class CommandQueue; -} - -namespace arm_compute -{ -/** Interface for OpenCL HOG data-object */ -class ICLHOG : public IHOG -{ -public: - /** Default constructor */ - ICLHOG(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLHOG(const ICLHOG &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLHOG &operator=(const ICLHOG &) = delete; - /** Allow instances of this class to be moved */ - ICLHOG(ICLHOG &&) = default; - /** Allow instances of this class to be moved */ - ICLHOG &operator=(ICLHOG &&) = default; - /** Default destructor */ - virtual ~ICLHOG() = default; - - /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor - * - * @return A reference to an OpenCL buffer containing the hog's descriptor - */ - virtual const cl::Buffer &cl_buffer() const = 0; - - /** Enqueue a map operation of the allocated buffer on the given queue. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - * - * @return The mapping address. - */ - void map(cl::CommandQueue &q, bool blocking = true); - - /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - void unmap(cl::CommandQueue &q); - - /** Interface to be implemented by the child class to free the allocated cl buffer. - * - * @warning The buffer must have been allocated previously. Otherwise calling the function will fail. - */ - virtual void free() = 0; - - // Inherited methods overridden: - float *descriptor() const override; - -protected: - /** Method to be implemented by the child class to map the OpenCL buffer - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; - /** Method to be implemented by the child class to unmap the OpenCL buffer - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - virtual void do_unmap(cl::CommandQueue &q) = 0; - -private: - uint8_t *_mapping; -}; -} -#endif /*ARM_COMPUTE_ICLHOG_H */ diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h deleted file mode 100644 index 3e545c61aa..0000000000 --- a/arm_compute/core/CL/ICLKernel.h +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLKERNEL_H -#define ARM_COMPUTE_ICLKERNEL_H - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLTypes.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/IKernel.h" - -#include <string> - -namespace arm_compute -{ -template <typename T> -class ICLArray; -class ICLTensor; -class Window; - -/** Common interface for all the OpenCL kernels */ -class ICLKernel : public IKernel -{ -private: - /** Returns the number of arguments enqueued per array object. - * - * @return The number of arguments enqueued per array object. - */ - template <unsigned int dimension_size> - constexpr static unsigned int num_arguments_per_array() - { - return num_arguments_per_tensor<dimension_size>(); - } - /** Returns the number of arguments enqueued per tensor object. - * - * @return The number of arguments enqueued per tensor object. - */ - template <unsigned int dimension_size> - constexpr static unsigned int num_arguments_per_tensor() - { - return 2 + 2 * dimension_size; - } - using IKernel::configure; //Prevent children from calling IKernel::configure() directly -protected: - /** Configure the kernel's window and local workgroup size hint. - * - * @param[in] window The maximum window which will be returned by window() - * @param[in] lws_hint (Optional) Local-Workgroup-Size to use. - */ - void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange()) - { - _lws_hint = lws_hint; - IKernel::configure(window); - } - -public: - /** Constructor */ - ICLKernel() - : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint() - { - } - /** Returns a reference to the OpenCL kernel of this object. - * - * @return A reference to the OpenCL kernel of this object. - */ - cl::Kernel &kernel() - { - return _kernel; - } - /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ - template <typename T> - void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) - { - add_array_argument<T, 1>(idx, array, strides, num_dimensions, window); - } - /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<1>(idx, tensor, window); - } - /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. - * - * @param[in] cond Condition to check - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - if(cond) - { - add_1D_tensor_argument(idx, tensor, window); - } - } - /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<2>(idx, tensor, window); - } - /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true. - * - * @param[in] cond Condition to check - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - if(cond) - { - add_2D_tensor_argument(idx, tensor, window); - } - } - /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<3>(idx, tensor, window); - } - /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window) - { - add_tensor_argument<4>(idx, tensor, window); - } - /** Returns the number of arguments enqueued per 1D array object. - * - * @return The number of arguments enqueues per 1D array object. - */ - constexpr static unsigned int num_arguments_per_1D_array() - { - return num_arguments_per_array<1>(); - } - /** Returns the number of arguments enqueued per 1D tensor object. - * - * @return The number of arguments enqueues per 1D tensor object. - */ - constexpr static unsigned int num_arguments_per_1D_tensor() - { - return num_arguments_per_tensor<1>(); - } - /** Returns the number of arguments enqueued per 2D tensor object. - * - * @return The number of arguments enqueues per 2D tensor object. - */ - constexpr static unsigned int num_arguments_per_2D_tensor() - { - return num_arguments_per_tensor<2>(); - } - /** Returns the number of arguments enqueued per 3D tensor object. - * - * @return The number of arguments enqueues per 3D tensor object. - */ - constexpr static unsigned int num_arguments_per_3D_tensor() - { - return num_arguments_per_tensor<3>(); - } - /** Returns the number of arguments enqueued per 4D tensor object. - * - * @return The number of arguments enqueues per 4D tensor object. - */ - constexpr static unsigned int num_arguments_per_4D_tensor() - { - return num_arguments_per_tensor<4>(); - } - /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue. - * - * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns. - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel. - */ - virtual void run(const Window &window, cl::CommandQueue &queue) = 0; - /** Add the passed parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set. - * @param[in] value Value to set as an argument of the object's kernel. - */ - template <typename T> - void add_argument(unsigned int &idx, T value) - { - _kernel.setArg(idx++, value); - } - - /** Set the Local-Workgroup-Size hint - * - * @note This method should be called after the configuration of the kernel - * - * @param[in] lws_hint Local-Workgroup-Size to use - */ - void set_lws_hint(const cl::NDRange &lws_hint) - { - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure() - _lws_hint = lws_hint; - } - - /** Return the Local-Workgroup-Size hint - * - * @return Current lws hint - */ - cl::NDRange lws_hint() const - { - return _lws_hint; - } - - /** Get the configuration ID - * - * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel - * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel. - * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry - * - * @note This method should be called after the configuration of the kernel - * - * @return configuration id string - */ - const std::string &config_id() const - { - return _config_id; - } - - /** Set the targeted GPU architecture - * - * @param[in] target The targeted GPU architecture - */ - void set_target(GPUTarget target) - { - _target = target; - } - - /** Set the targeted GPU architecture according to the CL device - * - * @param[in] device A CL device - */ - void set_target(cl::Device &device); - - /** Get the targeted GPU architecture - * - * @return The targeted GPU architecture. - */ - GPUTarget get_target() const - { - return _target; - } - - /** Get the maximum workgroup size for the device the CLKernelLibrary uses. - * - * @return The maximum workgroup size value. - */ - size_t get_max_workgroup_size(); - /** Get the global work size given an execution window - * - * @param[in] window Execution window - * - * @return Global work size of the given execution window - */ - static cl::NDRange gws_from_window(const Window &window); - -private: - /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ - template <typename T, unsigned int dimension_size> - void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window); - /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] window Window the kernel will be executed on. - */ - template <unsigned int dimension_size> - void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window); - -protected: - cl::Kernel _kernel; /**< OpenCL kernel to run */ - GPUTarget _target; /**< The targeted GPU */ - std::string _config_id; /**< Configuration ID */ - size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ -private: - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ -}; - -/** Add the kernel to the command queue with the given window. - * - * @note Depending on the size of the window, this might translate into several jobs being enqueued. - * - * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. - * - * @param[in,out] queue OpenCL command queue. - * @param[in] kernel Kernel to enqueue - * @param[in] window Window the kernel has to process. - * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target. - * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false - * Note: it is kernel responsibility to check if the work-item is out-of-range - * - * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. - */ -void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false); - -/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] array Array to set as an argument of the object's kernel. - * @param[in] strides @ref Strides object containing stride of each dimension in bytes. - * @param[in] num_dimensions Number of dimensions of the @p array. - * @param[in] window Window the kernel will be executed on. - */ -template <typename T, unsigned int dimension_size> -void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window) -{ - ARM_COMPUTE_ERROR_ON(array == nullptr); - - // Calculate offset to the start of the window - unsigned int offset_first_element = 0; - - for(unsigned int n = 0; n < num_dimensions; ++n) - { - offset_first_element += window[n].start() * strides[n]; - } - - unsigned int idx_start = idx; - _kernel.setArg(idx++, array->cl_buffer()); - - for(unsigned int dimension = 0; dimension < dimension_size; dimension++) - { - _kernel.setArg<cl_uint>(idx++, strides[dimension]); - _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step()); - } - - _kernel.setArg<cl_uint>(idx++, offset_first_element); - - ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array<dimension_size>() != idx, - "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>()); - ARM_COMPUTE_UNUSED(idx_start); -} -} -#endif /*ARM_COMPUTE_ICLKERNEL_H */ diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h deleted file mode 100644 index 430adb8727..0000000000 --- a/arm_compute/core/CL/ICLLut.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLLUT_H -#define ARM_COMPUTE_ICLLUT_H - -#include "arm_compute/core/ILut.h" - -#include <cstdint> - -namespace cl -{ -class Buffer; -class CommandQueue; -} - -namespace arm_compute -{ -/** Interface for OpenCL LUT */ -class ICLLut : public ILut -{ -public: - ICLLut(); - ICLLut(const ICLLut &) = delete; - ICLLut &operator=(const ICLLut &) = delete; - - /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data. - * - * @return A reference to an OpenCL buffer containing the lut's data. - */ - virtual const cl::Buffer &cl_buffer() const = 0; - /** Enqueue a map operation of the allocated buffer on the given queue. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - void map(cl::CommandQueue &q, bool blocking = true); - /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - void unmap(cl::CommandQueue &q); - - // Inherited methods overridden: - uint8_t *buffer() const override; - -protected: - /** Method to be implemented by the child class to map the OpenCL buffer - * - * @param[in,out] q The CL command queue to use for the mapping operation. - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0; - /** Method to be implemented by the child class to unmap the OpenCL buffer - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - * - * @param[in,out] q The CL command queue to use for the mapping operation. - */ - virtual void do_unmap(cl::CommandQueue &q) = 0; - -private: - uint8_t *_mapping; -}; -} -#endif /*ARM_COMPUTE_ICLLUT_H */ diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h deleted file mode 100644 index f9213018a2..0000000000 --- a/arm_compute/core/CL/ICLMultiHOG.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLMULTIHOG_H -#define ARM_COMPUTE_ICLMULTIHOG_H - -#include "arm_compute/core/CL/ICLHOG.h" -#include "arm_compute/core/IMultiHOG.h" - -namespace arm_compute -{ -/** Interface for storing multiple HOG data-objects */ -class ICLMultiHOG : public IMultiHOG -{ -public: - /** Return a pointer to the requested OpenCL HOG model - * - * @param[in] index The index of the wanted OpenCL HOG model. - * - * @return A pointer pointed to the HOG model - */ - virtual ICLHOG *cl_model(size_t index) = 0; - /** Return a constant pointer to the requested OpenCL HOG model - * - * @param[in] index The index of the wanted OpenCL HOG model. - * - * @return A constant pointer pointed to the OpenCL HOG model - */ - virtual const ICLHOG *cl_model(size_t index) const = 0; - - // Inherited methods overridden: - IHOG *model(size_t index) override; - const IHOG *model(size_t index) const override; -}; -} -#endif /*ARM_COMPUTE_ICLMULTIHOG_H */ diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h deleted file mode 100644 index 0233600e73..0000000000 --- a/arm_compute/core/CL/ICLMultiImage.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLMULTIIMAGE_H -#define ARM_COMPUTE_ICLMULTIIMAGE_H - -#include "arm_compute/core/IMultiImage.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for OpenCL images */ -using ICLImage = ICLTensor; - -/** Interface for OpenCL multi-planar images */ -class ICLMultiImage : public IMultiImage -{ -public: - /** Return a pointer to the requested OpenCL plane of the image. - * - * @param[in] index The index of the wanted planed. - * - * @return A pointer pointed to the OpenCL plane - */ - virtual ICLImage *cl_plane(unsigned int index) = 0; - /** Return a constant pointer to the requested OpenCL plane of the image. - * - * @param[in] index The index of the wanted planed. - * - * @return A constant pointer pointed to the OpenCL plane - */ - virtual const ICLImage *cl_plane(unsigned int index) const = 0; - - // Inherited methods overridden: - IImage *plane(unsigned int index) override; - const IImage *plane(unsigned int index) const override; -}; -} -#endif /*ARM_COMPUTE_ICLMULTIIMAGE_H */ diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h deleted file mode 100644 index bd423303bb..0000000000 --- a/arm_compute/core/CL/ICLSimple2DKernel.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H -#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ -class ICLSimple2DKernel : public ICLSimpleKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} -#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */ diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h deleted file mode 100644 index e25051f578..0000000000 --- a/arm_compute/core/CL/ICLSimple3DKernel.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H -#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. - * Both input tensor and output tensor must have at least 3 dimensions. - */ -class ICLSimple3DKernel : public ICLSimple2DKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} -#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */ diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h deleted file mode 100644 index e8b6f0a81c..0000000000 --- a/arm_compute/core/CL/ICLSimpleKernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H -#define ARM_COMPUTE_ICLSIMPLEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" - -namespace arm_compute -{ -/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */ -class ICLSimpleKernel : public ICLKernel -{ -public: - /** Constructor. */ - ICLSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLSimpleKernel(const ICLSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICLSimpleKernel(ICLSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default; - /** Default destructor */ - ~ICLSimpleKernel() = default; - - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ICLTensor *_input; - ICLTensor *_output; -}; -} - -#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h index 001f892231..8de5423762 100644 --- a/arm_compute/core/CL/ICLTensor.h +++ b/arm_compute/core/CL/ICLTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_ICLTENSOR_H #define ARM_COMPUTE_ICLTENSOR_H -#include "arm_compute/core/ITensor.h" - #include "arm_compute/core/CL/CLTypes.h" +#include "arm_compute/core/ITensor.h" #include <cstdint> @@ -34,7 +33,7 @@ namespace cl { class Buffer; class CommandQueue; -} +} // namespace cl namespace arm_compute { @@ -71,8 +70,6 @@ public: * @param[in] blocking If true, then the mapping will be ready to use by the time * this method returns, else it is the caller's responsibility * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - * - * @return The mapping address. */ void map(cl::CommandQueue &q, bool blocking = true); /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. @@ -115,5 +112,5 @@ private: }; using ICLImage = ICLTensor; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ICLTENSOR_H */ diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 72cbb3d2b2..8b5bf97099 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_OPENCL_H -#define ARM_COMPUTE_OPENCL_H +#ifndef ACL_ARM_COMPUTE_CORE_CL_OPENCL_H +#define ACL_ARM_COMPUTE_CORE_CL_OPENCL_H #include <string> #include <utility> @@ -31,8 +31,8 @@ #ifndef ARM_COMPUTE_NO_EXCEPTIONS #define CL_HPP_ENABLE_EXCEPTIONS #endif // ARM_COMPUTE_NO_EXCEPTIONS -#define CL_TARGET_OPENCL_VERSION 200 -#define CL_HPP_TARGET_OPENCL_VERSION 110 +#define CL_TARGET_OPENCL_VERSION 300 +#define CL_HPP_TARGET_OPENCL_VERSION 110 #define CL_HPP_MINIMUM_OPENCL_VERSION 110 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Weffc++" @@ -40,8 +40,8 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" #if defined(__GNUG__) && __GNUG__ >= 8 #pragma GCC diagnostic ignored "-Wcatch-value" -#endif // defined(__GNUG__) && __GNUG__ >= 8 -#include <CL/cl2.hpp> +#endif // defined(__GNUG__) && __GNUG__ >= 8 +#include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp #pragma GCC diagnostic pop namespace cl @@ -73,25 +73,27 @@ public: * @return The static instance of CLSymbols. */ static CLSymbols &get(); - /** Load symbols from the given OpenCL library path. + /** This method attempts to load the OpenCL symbols from the first available library from the provided OpenCL libraries. * - * @param[in] library Path to the OpenCL library. + * @param[in] libraries_filenames Vector containing the filenames of the libraries to be loaded. + * @param[in] use_loader Use symbol loader function loadOpenCLPointer. * - * @return True if loading the library is successful. + * @return True if loading the library is successful. False if all the provided libraries could not be loaded. */ - bool load(const std::string &library); + bool load(const std::vector<std::string> &libraries_filenames, bool use_loader = false); /** Load symbols from any of the default OpenCL library names. + * If all the default libraries could not be loaded, this method will print a warning message and return false. * * @return True if loading any library is successful. */ bool load_default(); -#define DECLARE_FUNCTION_PTR(func_name) \ - std::function<decltype(func_name)> func_name##_ptr = nullptr +#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr DECLARE_FUNCTION_PTR(clCreateContext); DECLARE_FUNCTION_PTR(clCreateContextFromType); DECLARE_FUNCTION_PTR(clCreateCommandQueue); + DECLARE_FUNCTION_PTR(clCreateCommandQueueWithProperties); DECLARE_FUNCTION_PTR(clGetContextInfo); DECLARE_FUNCTION_PTR(clBuildProgram); DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel); @@ -123,6 +125,7 @@ public: DECLARE_FUNCTION_PTR(clGetDeviceIDs); DECLARE_FUNCTION_PTR(clGetMemObjectInfo); DECLARE_FUNCTION_PTR(clRetainEvent); + DECLARE_FUNCTION_PTR(clGetPlatformInfo); DECLARE_FUNCTION_PTR(clGetPlatformIDs); DECLARE_FUNCTION_PTR(clGetKernelWorkGroupInfo); DECLARE_FUNCTION_PTR(clGetCommandQueueInfo); @@ -135,6 +138,18 @@ public: DECLARE_FUNCTION_PTR(clEnqueueMarker); DECLARE_FUNCTION_PTR(clWaitForEvents); DECLARE_FUNCTION_PTR(clCreateImage); + DECLARE_FUNCTION_PTR(clSetKernelExecInfo); + DECLARE_FUNCTION_PTR(clGetExtensionFunctionAddressForPlatform); + + // Command buffer and mutable dispatch command buffer extensions + DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR); + DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR); + DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR); + DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR); + DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR); + DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR); + + DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR); // Third-party extensions DECLARE_FUNCTION_PTR(clImportMemoryARM); @@ -145,4 +160,4 @@ private: std::pair<bool, bool> _loaded; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_OPENCL_H */ +#endif // ACL_ARM_COMPUTE_CORE_CL_OPENCL_H diff --git a/arm_compute/core/CL/gemm/CLGEMMHelpers.h b/arm_compute/core/CL/gemm/CLGEMMHelpers.h deleted file mode 100644 index dcda732c2d..0000000000 --- a/arm_compute/core/CL/gemm/CLGEMMHelpers.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMHELPERS_H -#define ARM_COMPUTE_CLGEMMHELPERS_H - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo - * - * @param[in] m Number of rows (M) in the LHS matrix not reshaped - * @param[in] n Number of columns (N) in the RHS matrix not reshaped - * @param[in] m0 Number of rows processed by each thread/work-item - * @param[in] n0 Number of columns processed by each thread/work-item - * @param[in] k0 Number of inner accumulation performed by each thread/work-item - * @param[in] v0 Number of vertical blocks of size (m0xk0) stored on the same output row - * @param[in] h0 Number of horizontal blocks of size (k0xn0) stored on the same output row - * @param[in] lhs_interleave True if the v0 (m0xk0) blocks have to be interleaved in the output row - * @param[in] rhs_interleave True if the h0 (k0xn0) blocks have to be interleaved in the output row - * @param[in] lhs_transpose True if the (m0xk0) block has to be transposed before been stored - * @param[in] rhs_transpose True if the (k0xn0) block has to be transposed before been stored - * - * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo - */ -std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, - bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose); -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMHELPERS_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h deleted file mode 100644 index a6341e5094..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h" - -#include <memory> - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMNative factory class */ -class CLGEMMNativeKernelConfigurationFactory final -{ -public: - /** Static method to construct CLGEMMNative kernel object accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMNative kernel configuration class - */ - static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationMidgard>(gpu); - case GPUTarget::BIFROST: - return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationValhall>(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h deleted file mode 100644 index 5b2abe6f0f..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h deleted file mode 100644 index 0e95a15613..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Midgard based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationMidgard final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationMidgard(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONMIDGARD_H */ diff --git a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h deleted file mode 100644 index e739997b3a..0000000000 --- a/arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMNative configuration */ -class CLGEMMNativeKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMNativeKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h deleted file mode 100644 index 10dc9aefdb..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h" - -#include <memory> - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMReshaped factory class */ -class CLGEMMReshapedKernelConfigurationFactory final -{ -public: - /** Static method to call the CLGEMMReshaped kernel configuration class accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMReshaped kernel configuration class - */ - static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - case GPUTarget::BIFROST: - return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationBifrost>(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique<CLGEMMReshapedKernelConfigurationValhall>(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h deleted file mode 100644 index 55742e3e56..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMReshaped configuration */ -class CLGEMMReshapedKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h deleted file mode 100644 index e65974144d..0000000000 --- a/arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMReshaped configuration */ -class CLGEMMReshapedKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h deleted file mode 100644 index 7909726164..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h" - -#include <memory> - -namespace arm_compute -{ -namespace cl_gemm -{ -/** CLGEMMReshapedOnlyRHS factory class */ -class CLGEMMReshapedOnlyRHSKernelConfigurationFactory final -{ -public: - /** Static method to call the CLGEMMReshapedOnlyRHS kernel configuration class accordingly with the GPU target - * - * @param[in] gpu GPU target - * - * @return CLGEMMReshapedOnlyRHS kernel configuration class - */ - static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget gpu) - { - switch(get_arch_from_target(gpu)) - { - case GPUTarget::MIDGARD: - case GPUTarget::BIFROST: - return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationBifrost>(gpu); - case GPUTarget::VALHALL: - return support::cpp14::make_unique<CLGEMMReshapedOnlyRHSKernelConfigurationValhall>(gpu); - default: - ARM_COMPUTE_ERROR("Not supported GPU target"); - } - } -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATION_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h deleted file mode 100644 index 044bdc7b18..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Bifrost based OpenCL GEMMReshapedOnlyRHS configuration */ -class CLGEMMReshapedOnlyRHSKernelConfigurationBifrost final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONBIFROST_H */ diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h deleted file mode 100644 index 6dba6fdb00..0000000000 --- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H -#define ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H - -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" - -namespace arm_compute -{ -namespace cl_gemm -{ -/** Valhall based OpenCL GEMMReshapedOnlyRHS configuration */ -class CLGEMMReshapedOnlyRHSKernelConfigurationValhall final : public ICLGEMMKernelConfiguration -{ -public: - /** Constructor - * - * @param[in] gpu GPU target - */ - CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu); - - // Inherited overridden method - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override; - -private: - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b); - std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b); -}; -} // namespace cl_gemm -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMRESHAPEDONLYRHSKERNELCONFIGURATIONVALHALL_H */ diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h deleted file mode 100644 index 58dea3bdae..0000000000 --- a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the absolute difference kernel. - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class CLAbsoluteDifferenceKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~CLAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output images. - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1. */ - const ICLTensor *_input2; /**< Source tensor 2. */ - ICLTensor *_output; /**< Destination tensor. */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h deleted file mode 100644 index f639148e25..0000000000 --- a/arm_compute/core/CL/kernels/CLAccumulateKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H -#define ARM_COMPUTE_CLACCUMULATEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the accumulate kernel. - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class CLAccumulateKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, ICLTensor *accum); - /** Set the input and accumulation tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); -}; - -/** Interface for the accumulate weighted kernel. - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class CLAccumulateWeightedKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation images, and the scale value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input, float alpha, ICLTensor *accum); - /** Set the input and accumulation images, and the scale value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); -}; - -/** Interface for the accumulate squared kernel. - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class CLAccumulateSquaredKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h deleted file mode 100644 index 1e83a689cd..0000000000 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for the activation layer kernel. */ -class CLActivationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLActivationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLActivationLayerKernel(CLActivationLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; - /** Default destructor */ - ~CLActivationLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - */ - void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h deleted file mode 100644 index 94e8baed13..0000000000 --- a/arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel - * - * @note The default data type for an uninitialized output tensor is - * signed 32-bit integer (S32). It is the user's responsibility to check - * that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class CLArgMinMaxLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLArgMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default; - /** Default destructor */ - ~CLArgMinMaxLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32. - * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[out] output Destination tensor. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - */ - void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S32/F16/F32. - * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[out] output Destination tensor. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: S32/F16/F32. - * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32 - * Has to be nullptr for the first iteration - * @param[in] output Destination tensor info. Data types supported: U32/S32 - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_prev_output; - ICLTensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h deleted file mode 100644 index 163666853c..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLBatchConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h deleted file mode 100644 index 8eaaca845a..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the BatchNormalization layer kernel. - */ -class CLBatchNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLBatchNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchNormalizationLayerKernel() = default; - - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, - ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, - const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_mean; - const ICLTensor *_var; - const ICLTensor *_beta; - const ICLTensor *_gamma; - float _epsilon; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h deleted file mode 100644 index 2b12ad094a..0000000000 --- a/arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch to space kernel */ -class CLBatchToSpaceLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_block_shape; /**< Block shape tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h deleted file mode 100644 index 8defe32862..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H -#define ARM_COMPUTE_CLBITWISEANDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise AND operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] - */ -class CLBitwiseAndKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseAndKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h deleted file mode 100644 index b86ce7f173..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H -#define ARM_COMPUTE_CLBITWISENOTKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise NOT operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = \lnot input(x,y) @f] - */ -class CLBitwiseNotKernel : public ICLSimple2DKernel -{ -public: - /** Set the inputs and output images. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the inputs and output images. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h deleted file mode 100644 index 65eb50f0fd..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H -#define ARM_COMPUTE_CLBITWISEORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise OR operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] - */ -class CLBitwiseOrKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseOrKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h deleted file mode 100644 index 5c63a7f22c..0000000000 --- a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H -#define ARM_COMPUTE_CLBITWISEXORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bitwise XOR operation kernel. - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] - */ -class CLBitwiseXorKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLBitwiseXorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default; - /** Set the inputs and output images - * - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8. - * @param[in] input2 Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h b/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h deleted file mode 100644 index bbe11562ed..0000000000 --- a/arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the bounding box kernel */ -class CLBoundingBoxTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBoundingBoxTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default; - /** Default destructor */ - ~CLBoundingBoxTransformKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform - * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - * @return a Status - */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_boxes; - ICLTensor *_pred_boxes; - const ICLTensor *_deltas; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h deleted file mode 100644 index ea3c1c1f3e..0000000000 --- a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H -#define ARM_COMPUTE_CLBOX3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the box 3x3 filter kernel. - * - */ -class CLBox3x3Kernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - //Inherited methods overriden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h deleted file mode 100644 index 40ad4dcd84..0000000000 --- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H -#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform Gradient computation. - */ -class CLGradientKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel(const CLGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel &operator=(const CLGradientKernel &) = delete; - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Source tensor - Gx component */ - const ICLTensor *_gy; /**< Source tensor - Gy component */ - ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ - ICLTensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. - * - * @note Hysteresis is computed in @ref CLEdgeTraceKernel - */ -class CLEdgeNonMaxSuppressionKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ - const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ - ICLTensor *_output; /**< Destination tensor. */ -}; - -/** OpenCL kernel to perform Edge tracing. - */ -class CLEdgeTraceKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor. */ - ICLTensor *_output; /**< Destination tensor. */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ - ICLTensor *_visited; /**< Marks visited elements */ - ICLTensor *_recorded; /**< Marks recorded elements */ - ICLTensor *_l1_stack; /**< L1 hysteris stack */ - ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h deleted file mode 100644 index 32ddf152c3..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include <array> -#include <cstdint> - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel combine kernel */ -class CLChannelCombineKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel(CLChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; - /** Default destructor */ - ~CLChannelCombineKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. - */ - void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. - */ - void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - std::array<const ICLTensor *, 4> _planes; - ICLTensor *_output; - ICLMultiImage *_output_multi; - std::array<uint32_t, 3> _x_subsampling; - std::array<uint32_t, 3> _y_subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h deleted file mode 100644 index 6a0c4bb94e..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel extract kernel */ -class CLChannelExtractKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel(CLChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; - /** Default destructor */ - ~CLChannelExtractKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _num_elems_processed_per_iteration; - uint32_t _subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h b/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h deleted file mode 100644 index 14b59d325f..0000000000 --- a/arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H -#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the channel shuffle kernel */ -class CLChannelShuffleLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelShuffleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default; - /** Default destructor */ - ~CLChannelShuffleLayerKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); - /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h deleted file mode 100644 index d0528ed21a..0000000000 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H -#define ARM_COMPUTE_CLCOL2IMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the col2im reshaping kernel. - * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel. - * - * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: - * - * @f[ - * \left( \begin{array}{ccccccccc} - * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccc} - * a0 & a1 & a2 \\ - * a3 & a4 & a5 \\ - * a6 & a7 & a8 \\ - * \end{array} \right) - * @f] - */ -class CLCol2ImKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCol2ImKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCol2ImKernel(const CLCol2ImKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCol2ImKernel(CLCol2ImKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; - /** Default destructor */ - ~CLCol2ImKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW - * @param[in] convolved_dims Output convolved dimensions. - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _convolved_dims; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h deleted file mode 100644 index 2bcd141863..0000000000 --- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H -#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the color convert kernel. - * - */ -class CLColorConvertKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel(const CLColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLColorConvertKernel(CLColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; - /** Default destructor. */ - ~CLColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const ICLMultiImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /*pointer to single planar tensor input */ - ICLTensor *_output; /*pointer to single planar tensor output */ - const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ - ICLMultiImage *_multi_output; /*pointer to multi-planar output */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLComparisonKernel.h b/arm_compute/core/CL/kernels/CLComparisonKernel.h deleted file mode 100644 index d5c5297c61..0000000000 --- a/arm_compute/core/CL/kernels/CLComparisonKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H -#define ARM_COMPUTE_CLCOMPARISONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the comparison kernel. */ -class CLComparisonKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLComparisonKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComparisonKernel(const CLComparisonKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComparisonKernel &operator=(const CLComparisonKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComparisonKernel(CLComparisonKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComparisonKernel &operator=(CLComparisonKernel &&) = default; - /** Default destructor */ - ~CLComparisonKernel() = default; - /** Set the inputs and output tensors - * - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); - /** Set the inputs and output tensors - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); - /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel - * - * @param[in] input1 Source tensor. Data types supported: All. - * @param[in] input2 Source tensor. Data types supported: Same as @p input1. - * @param[in] output Destination tensor. Data types supported: U8. - * @param[in] operation Comparison operation to use. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h b/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h deleted file mode 100644 index d3e57a6738..0000000000 --- a/arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H -#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. - * - * @note This function can be applied to the 2D weights used by a Fully Connected layer if: - * - It follows a Convolution layer - * - The data layout used by the network does not match the one the model has been trained in. - * - * @note This function assumes the weights are already reshaped (transposed) - */ -class CLConvertFullyConnectedWeightsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLConvertFullyConnectedWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default; - /** Default destructor */ - ~CLConvertFullyConnectedWeightsKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Set the input and output tensor. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel - * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. - * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h deleted file mode 100644 index b6fe51dbaa..0000000000 --- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H -#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). - * The client can supply a convolution matrix \f$ C_{m,n} \f$. - * @f{eqnarray}{ - * k_0 &=& \frac{m}{2} \\ - * l_0 &=& \frac{n}{2} \\ - * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} - * @f} - * - * @note The above equation for this function is similar to the default OpenCV Filter2D function, - * which actually computes a correlation and not a convolution. - * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. - */ -template <unsigned int matrix_size> -class CLConvolutionKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ -using CLConvolution3x3Kernel = CLConvolutionKernel<3>; -/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ -using CLConvolution5x5Kernel = CLConvolutionKernel<5>; -/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ -using CLConvolution7x7Kernel = CLConvolutionKernel<7>; -/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ -using CLConvolution9x9Kernel = CLConvolutionKernel<9>; - -/****************************************************************************************\ - * Separable Square Convolution * -\****************************************************************************************/ - -/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ -template <unsigned int matrix_size> -class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel -{ -public: - /** Default Constructor */ - CLSeparableConvolutionHorKernel(); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; -/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; -/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; - -/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ -template <unsigned int matrix_size> -class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: S16. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S16. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; -/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; -/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -/** Kernel for the running convolution on a rectangle matrix. - * - * @note Supports combinations of 3,5,7 and 9. - */ -class CLConvolutionRectangleKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLConvolutionRectangleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h deleted file mode 100644 index 05dff8ed0c..0000000000 --- a/arm_compute/core/CL/kernels/CLCopyKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOPYKERNEL_H -#define ARM_COMPUTE_CLCOPYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a copy between two tensors */ -class CLCopyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCopyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCopyKernel(const CLCopyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCopyKernel &operator=(const CLCopyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCopyKernel(CLCopyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCopyKernel &operator=(CLCopyKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - */ - void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); - /** Initialize the kernel's input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel - * - * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Window _output_window; - bool _has_output_window; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLCropKernel.h b/arm_compute/core/CL/kernels/CLCropKernel.h deleted file mode 100644 index cbc2338940..0000000000 --- a/arm_compute/core/CL/kernels/CLCropKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCROPKERNEL_H -#define ARM_COMPUTE_CLCROPKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a copy between two tensors */ -class CLCropKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCropKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCropKernel(const CLCropKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLCropKernel &operator=(const CLCropKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCropKernel(CLCropKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCropKernel &operator=(CLCropKernel &&) = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC. - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *output_window = nullptr); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC. - * @param[in] output Destination tensor info. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *output_window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Coordinates2D _start; - uint32_t _batch_index; - float _extrapolation_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCROPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h deleted file mode 100644 index 0c65f519cc..0000000000 --- a/arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H -#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Deconvolution layer kernel on OpenCL. - */ -class CLDeconvolutionLayerUpsampleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLDeconvolutionLayerUpsampleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete; - /** Default Move Constructor. */ - CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default; - /** Default move assignment operator */ - CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default; - /** Default destructor */ - ~CLDeconvolutionLayerUpsampleKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PadStrideInfo _info; - DataLayout _data_layout; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h deleted file mode 100644 index 292c561e46..0000000000 --- a/arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H -#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution. - * - * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter. - * - * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size] - * - * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * - * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4]. - * - */ -class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLDeconvolutionReshapeOutputKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default; - /** Default destructor */ - ~CLDeconvolutionReshapeOutputKernel() = default; - - /** Initialise the kernel's source and destination. - * - * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); - /** Initialise the kernel's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size] - * Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, - const PadStrideInfo &deconv_info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. - * - * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input. - * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - bool _add_bias; - const ICLTensor *_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h deleted file mode 100644 index 5fe826d090..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h deleted file mode 100644 index 66eb6222b2..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H - -#include "arm_compute/core/CL/ICLSimple3DKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth conversion kernel. */ -class CLDepthConvertLayerKernel : public ICLSimple3DKernel -{ -public: - /** Set the input and output of the kernel. - * - * Valid conversions Input -> Output : - * - * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 - * - * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); - /** Set the input and output of the kernel. - * - * Valid conversions Input -> Output : - * - * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data) - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel - * - * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h deleted file mode 100644 index 87ac3c1ec1..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depth to space kernel */ -class CLDepthToSpaceLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h deleted file mode 100644 index 6cf0326467..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H - -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW. - */ -class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayer3x3NCHWKernel(); - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, - const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - unsigned int _conv_stride_x; - unsigned int _conv_pad_top; - unsigned int _conv_pad_left; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h deleted file mode 100644 index e564cf6fe0..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H - -#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC. - */ -class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayer3x3NHWCKernel(); - /** Default move assignment operator. */ - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - unsigned int _num_rows_processed_per_iteration; - unsigned int _num_planes_processed_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h deleted file mode 100644 index 8847cf9c46..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter - This kernel assumes that tensor for the weights is NOT reshaped (Native version) */ -class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLDepthwiseConvolutionLayerNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Initialize the function's source, destination and parameters - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Initialize the function's source, destination and parameters - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC - * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread - * @param[in] dwc_info Depthwise convolution layer info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U), - const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_weights; - const ICLTensor *_biases; - ICLTensor *_output; - unsigned int _depth_multiplier; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h deleted file mode 100644 index 8dc5d32e4f..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H -#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to reshape the weights of depthwise convolution. */ -class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; - /** Default Move Constructor. */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; - /** Default move assignment operator */ - CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; - - /** Initialize the function's source and destination. - * - * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - */ - void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); - /** Initialize the function's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel - * - * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC - * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. - * @param[in] info Depthwise convolution information to reshape the input tensor. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - - void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info); - void configure_generic(const DepthwiseConvolutionReshapeInfo &info); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h deleted file mode 100644 index bb154f1a5b..0000000000 --- a/arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the dequantization layer kernel. */ -class CLDequantizationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDequantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default; - /** Default destructor */ - ~CLDequantizationLayerKernel() = default; - /** Set the input, output, min and max. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor. Data types supported: F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input, output, min and max. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor. Data types supported: F16/F32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h deleted file mode 100644 index cd8ae90c2d..0000000000 --- a/arm_compute/core/CL/kernels/CLDerivativeKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H -#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the derivative kernel. */ -class CLDerivativeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel(const CLDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDerivativeKernel(CLDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; - /** Default destructor */ - ~CLDerivativeKernel() = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ - bool _run_derivative_x; /**< Do we need to run Derivative X ? */ - bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h deleted file mode 100644 index 45f5fe0764..0000000000 --- a/arm_compute/core/CL/kernels/CLDilateKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDILATEKERNEL_H -#define ARM_COMPUTE_CLDILATEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the dilate kernel. - * - */ -class CLDilateKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h deleted file mode 100644 index 489d7c27c5..0000000000 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the direct convolution kernel. - */ -class CLDirectConvolutionLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDirectConvolutionLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default; - /** Default destructor */ - ~CLDirectConvolutionLayerKernel() = default; - /** Set the input, weights, biases and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 - * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 - * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - /** Set the input, weights, biases and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 - * 5x5 convolution with stride_x = 1/2, stride_y = 1/2 - * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] target Target GPU architecture. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -public: - const ICLTensor *_input; - const ICLTensor *_biases; - const ICLTensor *_weights; - ICLTensor *_output; - DataLayout _data_layout; - BorderSize _border_size; - int _conv_stride_x; - int _conv_stride_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h b/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h deleted file mode 100644 index e190bdebbe..0000000000 --- a/arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H -#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLSimpleKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the elementwise unary operator */ -class CLElementWiseUnaryLayerKernel : public ICLSimpleKernel -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in] input First tensor input. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op); - /** Initialise the kernel's inputs, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input First tensor input. Data types supported: F16/F32. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op); - /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] op Element wise unary operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h deleted file mode 100644 index 4d3d4bc834..0000000000 --- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H -#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for an element-wise operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] - * - */ -class CLElementwiseOperationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLElementwiseOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default; - /** Default destructor */ - ~CLElementwiseOperationKernel() = default; - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -protected: - /** The name of the operation */ - virtual std::string name() = 0; - - /** Initialise the kernel's output. - * - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - * - * @return a pair of Status and Window - */ - virtual std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0; - - /** Validate the argument passed to the kernel - * - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - */ - virtual Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; - - /** Generate the build options for the specific kernel - * - * @reutrn a CLBuildOptions struct - */ - virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0; - - /** Generate the identifier for tuning - * - * @reutrn a string - */ - virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0; - - /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) - * - */ - void configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff) - * - */ - void configure_common(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - ActivationLayerInfo _act_info; - -private: - const ICLTensor *_input1; /**< Source tensor 1 */ - const ICLTensor *_input2; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ -}; - -/** Addition operation */ -class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel -{ -public: - CLSaturatedArithmeticOperationKernel() - : CLElementwiseOperationKernel(), _policy(), _op() - { - } - - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - -protected: - // Inherited methods overridden: - std::string name() override; - std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; - Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; - -private: - ConvertPolicy _policy; - ArithmeticOperation _op; -}; - -class CLArithmeticOperationKernel : public CLElementwiseOperationKernel -{ -public: - CLArithmeticOperationKernel() - : CLElementwiseOperationKernel(), _op() - { - } - - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - -protected: - // Inherited methods overridden: - std::string name() override; - std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; - Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; - std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; - -private: - ArithmeticOperation _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h deleted file mode 100644 index cbc748194c..0000000000 --- a/arm_compute/core/CL/kernels/CLErodeKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLERODEKERNEL_H -#define ARM_COMPUTE_CLERODEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the erode kernel. - * - */ -class CLErodeKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLERODEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h b/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h deleted file mode 100644 index a8da1246bb..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H -#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the digit reverse operation kernel. */ -class CLFFTDigitReverseKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTDigitReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete; - /** Default Move Constructor. */ - CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default; - /** Default move assignment operator */ - CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default; - /** Default destructor */ - ~CLFFTDigitReverseKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] idx Digit reverse index tensor info. Data type supported: U32 - * @param[in] config Kernel configuration. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_idx; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h b/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h deleted file mode 100644 index e3f53462d9..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H -#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -#include <set> - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the FFT radix stage kernel. */ -class CLFFTRadixStageKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTRadixStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete; - /** Default Move Constructor. */ - CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default; - /** Default move assignment operator */ - CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default; - /** Default destructor */ - ~CLFFTRadixStageKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - */ - void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input - * @param[in] config FFT descriptor metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); - /** Returns the radix that are support by the FFT kernel - * - * @return A set of supported radix - */ - static std::set<unsigned int> supported_radix(); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h b/arm_compute/core/CL/kernels/CLFFTScaleKernel.h deleted file mode 100644 index d0d2b7613c..0000000000 --- a/arm_compute/core/CL/kernels/CLFFTScaleKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H -#define ARM_COMPUTE_CLFFTSCALEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the inverse fft scale kernel. */ -class CLFFTScaleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLFFTScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTScaleKernel(const CLFFTScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete; - /** Default Move Constructor. */ - CLFFTScaleKernel(CLFFTScaleKernel &&) = default; - /** Default move assignment operator */ - CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default; - /** Default destructor */ - ~CLFFTScaleKernel() = default; - /** Set the input and output tensors. - * - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] config Kernel configuration - */ - void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] config Kernel configuration - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel - * - * @param[in] input Source tensor info. Data types supported: F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h deleted file mode 100644 index 1a0d4e36a5..0000000000 --- a/arm_compute/core/CL/kernels/CLFastCornersKernel.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H -#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** CL kernel to perform fast corners */ -class CLFastCornersKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel(const CLFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFastCornersKernel(CLFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; - /** Default destructor */ - ~CLFastCornersKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLImage *_input; - ICLImage *_output; -}; - -/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ -class CLCopyToArrayKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCopyToArrayKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; - /** Default destructor */ - ~CLCopyToArrayKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< source image */ - ICLKeyPointArray *_corners; /**< destination array */ - cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h deleted file mode 100644 index d00ea55a83..0000000000 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H -#define ARM_COMPUTE_CLFILLBORDERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for filling the border of a kernel */ -class CLFillBorderKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFillBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFillBorderKernel(const CLFillBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFillBorderKernel(CLFillBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default; - /** Default destructor */ - ~CLFillBorderKernel() = default; - - /** Initialise the kernel's input, output and border mode. - * - * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - - /** Function to set the constant value on fill border kernel depending on type. - * - * @param[in] idx Index of the kernel argument to set. - * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. - */ - template <class T> - void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - bool is_parallelisable() const override; - -private: - ICLTensor *_tensor; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h b/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h deleted file mode 100644 index ab009e1aa8..0000000000 --- a/arm_compute/core/CL/kernels/CLFlattenLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL interface for the flatten kernel.*/ -class CLFlattenLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFloorKernel.h b/arm_compute/core/CL/kernels/CLFloorKernel.h deleted file mode 100644 index 4d1ed789db..0000000000 --- a/arm_compute/core/CL/kernels/CLFloorKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFLOORKERNEL_H -#define ARM_COMPUTE_CLFLOORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a floor operation */ -class CLFloorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFloorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFloorKernel(const CLFloorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFloorKernel &operator=(const CLFloorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFloorKernel(CLFloorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFloorKernel &operator=(CLFloorKernel &&) = default; - /** Default destructor */ - ~CLFloorKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] output Destination tensor info. Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h deleted file mode 100644 index 2fe6b223ca..0000000000 --- a/arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */ -class CLFuseBatchNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFuseBatchNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default; - /** Default destructor */ - ~CLFuseBatchNormalizationKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel - * - * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights - * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - * - * @return a status - */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input_weights; - const ICLTensor *_input_bias; - const ICLTensor *_bn_mean; - const ICLTensor *_bn_var; - const ICLTensor *_bn_gamma; - const ICLTensor *_bn_beta; - ICLTensor *_fused_weights; - ICLTensor *_fused_bias; - float _epsilon; - bool _run_in_place_weights; - bool _run_in_place_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h deleted file mode 100644 index 15fd20842e..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */ -class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h deleted file mode 100644 index 43526b7c41..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped - * - * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyReshapedKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel - * - * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] output Output tensor info. Data type supported: S32 - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - const GEMMReshapeInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_output_as_3d; - unsigned int _k; - bool _use_dummy_work_items; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h deleted file mode 100644 index 1aba6c0398..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped - * - * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel - * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported - */ -class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, - const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0 - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr, - const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32. - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info. - * Only the following values are supported for LHS info: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * Only the following values are supported for RHS info: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same as lhs_info.k0 - * rhs_info.transpose: true - * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32 - * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32 - * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32. - * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr, - const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr, - const ITensorInfo *output_shifts = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - const ICLTensor *_bias; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _is_quantized_per_channel; - bool _fuse_output_stage; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h deleted file mode 100644 index bc982c6120..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The final result is: - * - * mm_result[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - * - */ -class CLGEMMLowpOffsetContributionKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpOffsetContributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, - int32_t b_offset); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - ICLTensor *_mm_result; - const ICLTensor *_bias; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h deleted file mode 100644 index 583b388d45..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage. - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution - * of matrix A and matrix B and performs the output stage defined by the output_stage argument - * - * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo. - */ -class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpOffsetContributionOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - */ - void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset, - const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, - int32_t a_offset, int32_t b_offset, - const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info - * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM). - * Supported data types: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, - int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_mm_result; - const ICLTensor *_vector_sum_col; - const ICLTensor *_vector_sum_row; - const ICLTensor *_bias; - ICLTensor *_output; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized_per_channel; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h deleted file mode 100644 index 1e9fde8376..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Requantize - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to - * - to the [0..255] range and cast to QASYMM8. - * - to the [-128..127] range and cast to QASYMM8_SIGNED. - */ -class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] info Output stage info. Used to pass the quantized output data type - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h deleted file mode 100644 index 766ef9a820..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Add bias to final result if bias tensor is not a nullptr - * -# Shift the int32 accumulator by result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values: - * -# -to the [0..255] range and cast to QASYMM8. - * -# -to the [-128..127] range and cast to QASYMM8/SIGNED. - * - */ -class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] output_stage GEMMLowp output stage metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; - const GEMMLowpOutputStageInfo *_output_stage; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h deleted file mode 100644 index 6f58150037..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** CL kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QSYMM16 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. - * - */ -class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @param[in] input Input tensor info. Data type supported: S32 - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h deleted file mode 100644 index 0c237be34c..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. - */ -class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0 - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h deleted file mode 100644 index cb3e12e34d..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - */ -class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, - int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h deleted file mode 100644 index 857b1c7952..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H -#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; -struct GEMMLowpReductionKernelInfo; - -/** Common interface for all OpenCL reduction kernels */ -class ICLGEMMLowpReductionKernel : public ICLKernel -{ -public: - /** Constructor */ - ICLGEMMLowpReductionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete; - /** Allow instances of this class to be moved */ - ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default; - /** Allow instances of this class to be moved */ - ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S8 - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data type supported: S8 - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - -protected: - const ICLTensor *_input; - ICLTensor *_output; -}; - -/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h deleted file mode 100644 index df2f6f4ad1..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface to add a bias to each row of the input tensor - * - */ -class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixAccumulateBiasesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - */ - void configure(ICLTensor *accum, const ICLTensor *biases); - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel - * - * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - * @param[in] gpu_target GPU target - * - * @return a status - */ - static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_accum; - const ICLTensor *_biases; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h deleted file mode 100644 index 6085b34bcb..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result. - * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object - * - * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel, - * the flag @p is_interleaved_transposed must be set to true - * - * @attention @p input1 tensor must have at least 2 dimensions (matrix) - * - */ -class CLGEMMMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] gpu_target GPU Target - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, - bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h deleted file mode 100644 index c711a3d1f9..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */ -class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: same of lhs_info.k0 - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h deleted file mode 100644 index ee8e57fa8c..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped - * - * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyReshapedKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. - * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the - * multiplications. i.e. float c = (half)a * (half)b - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag. - * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the - * multiplications. i.e. float c = (half)a * (half)b - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel - * - * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4 - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3 - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.transpose: false - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.transpose: true - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @note lhs_info.k0 must be equal to rhs_info.k0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_output_as_3d; - unsigned int _k; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h deleted file mode 100644 index f7d314a039..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped - * - * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel - */ -class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel -{ -public: - /** Default Constructor */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0. - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, - const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - * - * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4. - * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3. - * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0. - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the matrix bias - * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported: - * lhs_info.m0: 1,2,3,4,5,6,7,8 - * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported: - * rhs_info.k0: 2,3,4,8,16 - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.transpose: true,false - * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info, - const GEMMRHSMatrixInfo &rhs_info, - const GEMMKernelInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _use_dummy_work_items; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h deleted file mode 100644 index 6d70b4b0c2..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the GEMM matrix vector multiply kernel. **/ -class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixVectorMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel - * - * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. - * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - int _num_rows_read_per_iteration; - BorderSize _border_size; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h deleted file mode 100644 index fe77fcb428..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H -#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication. - * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and - * stores each one in the output matrix unrolling the values - */ -class CLGEMMReshapeLHSMatrixKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMReshapeLHSMatrixKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor - */ - void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * lhs_info.m0: 2,3,4,5,6,7,8 - * lhs_info.k0: 2,3,4,8,16 - * lhs_info.v0: greater than 0 - * lhs_info.transpose: true, false - * lhs_info.interleave: true, false - * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - bool _reinterpret_input_as_3d; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h b/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h deleted file mode 100644 index 0e6352bdbb..0000000000 --- a/arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H -#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication - * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in - * the output matrix unrolling the values */ -class CLGEMMReshapeRHSMatrixKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMReshapeRHSMatrixKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - */ - void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary - * information to reshape the input tensor. Only the following values are supported: - * rhs_info.n0: 2,3,4,8,16 - * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false) - * rhs_info.h0: greater than 0 - * rhs_info.transpose: true, false - * rhs_info.interleave: true, false - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLGatherKernel.h b/arm_compute/core/CL/kernels/CLGatherKernel.h deleted file mode 100644 index b7539536e9..0000000000 --- a/arm_compute/core/CL/kernels/CLGatherKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGATHERKERNEL_H -#define ARM_COMPUTE_CLGATHERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform tensor reshaping */ -class CLGatherKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGatherKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGatherKernel(const CLGatherKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGatherKernel &operator=(const CLGatherKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGatherKernel(CLGatherKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGatherKernel &operator=(CLGatherKernel &&) = default; - /** Default destructor */ - ~CLGatherKernel() = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All. - * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_indices; /**< Indices tensor */ - ICLTensor *_output; /**< Destination tensor */ - int _axis; /**< Axis index */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h deleted file mode 100644 index 6a9d3eaa4d..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Gaussian 3x3 filter kernel. - * - */ -class CLGaussian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h deleted file mode 100644 index d8730e0c92..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H - -#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5HorKernel::configure; -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5VertKernel::configure; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h deleted file mode 100644 index 34cd062dae..0000000000 --- a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H - -#include "arm_compute/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ -class CLGaussianPyramidHorKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ -class CLGaussianPyramidVertKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h deleted file mode 100644 index 46dc16d6d5..0000000000 --- a/arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H -#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -namespace arm_compute -{ -class ICLTensor; - -/** Interface for Compute All Anchors kernel */ -class CLComputeAllAnchorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLComputeAllAnchorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default; - /** Default destructor */ - ~CLComputeAllAnchorsKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_anchors; - ICLTensor *_all_anchors; -}; -} // arm_compute -#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h deleted file mode 100644 index 046950551d..0000000000 --- a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** OpenCL kernel to perform HOG Orientation Binning */ -class CLHOGOrientationBinningKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~CLHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input_magnitude; - const ICLTensor *_input_phase; - ICLTensor *_output; - Size2D _cell_size; -}; - -/** OpenCL kernel to perform HOG block normalization */ -class CLHOGBlockNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~CLHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _num_cells_per_block_stride; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h deleted file mode 100644 index 681c212cc5..0000000000 --- a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H -#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLHOG.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/OpenCL.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform HOG detector kernel using linear SVM */ -class CLHOGDetectorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; - /** Default destructor */ - ~CLHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, - const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLDetectionWindowArray *_detection_windows; - cl::Buffer *_num_detection_windows; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h deleted file mode 100644 index a13119b82c..0000000000 --- a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the harris score kernel. - * - * @note The implementation supports 3, 5, and 7 for the block_size. - */ -class CLHarrisScoreKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; - /** Default destructor */ - ~CLHarrisScoreKernel() = default; - - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - /** Setup the kernel parameters - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -protected: - const ICLImage *_input1; /**< Source image - Gx component */ - const ICLImage *_input2; /**< Source image - Gy component */ - ICLImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h deleted file mode 100644 index 524e5ea997..0000000000 --- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLHeightConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _height_offset; - unsigned int _num_elems_processed_per_iteration; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h deleted file mode 100644 index 9cd374711b..0000000000 --- a/arm_compute/core/CL/kernels/CLHistogramKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H -#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLDistribution1D; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. - * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel - */ -class CLHistogramKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel(const CLHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramKernel(CLHistogramKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramKernel &operator=(CLHistogramKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; - -/** Interface to run the histogram kernel to handle the leftover part of image - * - */ -class CLHistogramBorderKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h deleted file mode 100644 index 61f2a3d489..0000000000 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H -#define ARM_COMPUTE_CLIM2COLKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the im2col reshape kernel. - * - * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. - * It is used to transform a convolution to a plain matrix multiplication. - * - * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * = - * \left( \begin{array}{ccccccccc} - * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ - * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ - * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ - * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - */ -class CLIm2ColKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIm2ColKernel(const CLIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - CLIm2ColKernel(CLIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * This is valid only for non-quantized inputs. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. - * Number of groups other than 1 is only supported for NCHW data layout. - * Number of groups should be multiple to the number of channels. - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, - const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * This is valid only for non-quantized inputs. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. - * Number of groups other than 1 is only supported for NCHW data layout. - * Number of groups should be multiple to the number of channels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - DataLayout _data_layout; - std::pair<unsigned int, unsigned int> _convolved_dims; - unsigned int _num_elems_processed_per_iteration; - Size2D _kernel_dims; - PadStrideInfo _conv_info; - unsigned int _num_groups; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h deleted file mode 100644 index 014dce1759..0000000000 --- a/arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for performing an instance normalization */ -class CLInstanceNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLInstanceNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~CLInstanceNormalizationLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] info Kernel meta-data descriptor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h deleted file mode 100644 index 6b6076a917..0000000000 --- a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to run the horizontal pass of the integral image kernel. */ -class CLIntegralImageHorKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); -}; - -/** Interface to run the vertical pass of the integral image kernel. */ -class CLIntegralImageVertKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLIntegralImageVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(ICLTensor *in_out); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(const CLCompileContext &compile_context, ICLTensor *in_out); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_in_out; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h deleted file mode 100644 index 169910b70d..0000000000 --- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H -#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ -class CLL2NormalizeLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLL2NormalizeLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default; - /** Default destructor */ - ~CLL2NormalizeLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); - - /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] sum Sum values tensor info. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_sum; - ICLTensor *_output; - unsigned int _actual_axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h deleted file mode 100644 index f94602c381..0000000000 --- a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H -#define ARM_COMPUTE_CLLKTRACKERKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstddef> -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** Internal keypoint structure for Lucas-Kanade Optical Flow */ -struct CLLKInternalKeypoint -{ - float x{ 0.f }; /**< x coordinate of the keypoint */ - float y{ 0.f }; /**< y coordinate of the keypoint */ - float tracking_status{ 0.f }; /**< the tracking status of the keypoint */ - float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ -}; - -/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */ -struct CLCoefficientTable -{ - float A11; /**< iA11 * FLT_SCALE */ - float A12; /**< iA11 * FLT_SCALE */ - float A22; /**< iA11 * FLT_SCALE */ - float min_eig; /**< Minimum eigenvalue */ -}; - -/** Structure for storing ival, ixval and iyval for each point inside the window */ -struct CLOldValue -{ - int16_t ival; /**< ival extracts from old image */ - int16_t ixval; /**< ixval extracts from scharr Gx image */ - int16_t iyval; /**< iyval extracts from scharr Gy image */ - int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */ -}; - -/** Interface for OpenCL Array of Internal Key Points. */ -using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>; -/** Interface for OpenCL Array of Coefficient Tables. */ -using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>; -/** Interface for OpenCL Array of Old Values. */ -using ICLOldValArray = ICLArray<CLOldValue>; - -/** Interface to run the initialization step of LKTracker */ -class CLLKTrackerInitKernel : public ICLKernel -{ -public: - /** Initialise the kernel input and output - * - * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points - * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points - * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points - * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points - * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */ -class CLLKTrackerFinalizeKernel : public ICLKernel -{ -public: - /** Initialise the kernel input and output - * - * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points - */ - void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points - * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points - */ - void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; -}; - -/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */ -class CLLKTrackerStage0Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLKTrackerStage0Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default; - /** Initialise the kernel input and output - * - * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 - * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points - * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[out] old_ival Pointer to the array holding internal values - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - size_t window_dimension, size_t level); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] old_input Pointer to the input old tensor. Data types supported: U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16 - * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points - * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[out] old_ival Pointer to the array holding internal values - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy, - ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal, - ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - size_t window_dimension, size_t level); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_old_input; - const ICLTensor *_old_scharr_gx; - const ICLTensor *_old_scharr_gy; -}; - -/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */ -class CLLKTrackerStage1Kernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLKTrackerStage1Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default; - /** Initialise the kernel input and output - * - * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points - * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[in] old_ival Pointer to the array holding internal values - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminating the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); - /** Initialise the kernel input and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] new_input Pointer to the input new tensor. Data types supported: U8 - * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points - * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients - * @param[in] old_ival Pointer to the array holding internal values - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminating the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival, - Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_new_input; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h deleted file mode 100644 index e68160f96d..0000000000 --- a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. - * - * @attention The second input tensor must have at least 2 dimensions (matrix) - * - */ -class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLocallyConnectedMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel - * - * @param[in] input0 First input tensor info. Data types supported: F32 - * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h deleted file mode 100644 index e0de3e7636..0000000000 --- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Template interface for the kernel to compute magnitude and phase. - * - */ -class CLMagnitudePhaseKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLMagnitudePhaseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] gx The input gradient X tensor. Data types supported: S16. - * @param[in] gy The input gradient Y tensor. Data types supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx The input gradient X tensor. Data types supported: S16. - * @param[in] gy The input gradient Y tensor. Data types supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Input gradient X. */ - const ICLTensor *_gy; /**< Input gradient Y. */ - ICLTensor *_magnitude; /**< Output - Magnitude. */ - ICLTensor *_phase; /**< Output - Phase. */ - bool _run_mag; /**< Calculate magnitude ? */ - bool _run_phase; /**< Calculate phase ? */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h deleted file mode 100644 index 96b4c4ea60..0000000000 --- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class CLMeanStdDevKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Initialise the kernel's input and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. - * - * @param[in] input Input image info. Data types supported: U8. - * @param[in] mean Input average pixel value. - * @param[in] global_sum Keeps global sum of pixel values. - * @param[in] stddev (Optional) Output standard deviation of pixel values. - * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLImage *_input; - float *_mean; - float *_stddev; - cl::Buffer *_global_sum; - cl::Buffer *_global_sum_squared; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h deleted file mode 100644 index ff0c96e168..0000000000 --- a/arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ -class CLMeanStdDevNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMeanStdDevNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default; - /** Default destructor */ - ~CLMeanStdDevNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); - /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h deleted file mode 100644 index c68ab07781..0000000000 --- a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H -#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the median 3x3 filter kernel. - * - */ -class CLMedian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMemsetKernel.h b/arm_compute/core/CL/kernels/CLMemsetKernel.h deleted file mode 100644 index 430bc1d4f2..0000000000 --- a/arm_compute/core/CL/kernels/CLMemsetKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H -#define ARM_COMPUTE_CLMEMSETKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for filling the planes of a tensor */ -class CLMemsetKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMemsetKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMemsetKernel(const CLMemsetKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMemsetKernel &operator=(const CLMemsetKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMemsetKernel(CLMemsetKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMemsetKernel &operator=(CLMemsetKernel &&) = default; - /** Default destructor */ - ~CLMemsetKernel() = default; - - /** Initialise the kernel's tensor and filling value - * - * @param[in,out] tensor Input tensor to fill. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Initialise the kernel's tensor and filling value - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Input tensor to fill. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel - * - * @param[in] tensor Source tensor info. Data types supported: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_tensor; - Window _full_window; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h deleted file mode 100644 index 5f9685f303..0000000000 --- a/arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform min max search on a 3D tensor. - */ -class CLMinMaxLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel - * - * @param[in] input Input tensor info. Data types supported: F32. - * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - /** Resets global minimum and maximum - * - * @param[in,out] queue Command queue on which to map and unmap the min_max tensor - */ - void reset(cl::CommandQueue &queue); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h deleted file mode 100644 index afb134fa59..0000000000 --- a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" - -#include <array> - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to perform min max search on an image. - */ -class CLMinMaxKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel(const CLMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxKernel(CLMinMaxKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const ICLImage *input, cl::Buffer *min_max); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Input image. */ - cl::Buffer *_min_max; /**< Minimum/maximum value. */ - std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */ -}; - -/** Interface for the kernel to find min max locations of an image. - */ -class CLMinMaxLocationKernel : public ICLKernel -{ -public: - /** Constructor */ - CLMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< Input image. */ - cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h deleted file mode 100644 index 1f337356e9..0000000000 --- a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to apply a non-linear filter */ -class CLNonLinearFilterKernel : public ICLSimple2DKernel -{ -public: - /** Default constructor */ - CLNonLinearFilterKernel(); - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - /** Set the source, destination and border mode of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h deleted file mode 100644 index a256bc798d..0000000000 --- a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H -#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL - * - * @note Used by @ref CLFastCorners and @ref CLHarrisCorners - */ -class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h deleted file mode 100644 index 2511818ef2..0000000000 --- a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the normalization layer kernel. - */ -class CLNormalizationLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input. - * Data layouts supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - BorderSize _border_size; - bool _is_norm_across_width; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h deleted file mode 100644 index d247e1fddc..0000000000 --- a/arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H -#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the NormalizePlanarYUV layer kernel. */ -class CLNormalizePlanarYUVLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizePlanarYUVLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default; - /** Default move assignment operator */ - CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default; - /** Default destructor */ - ~CLNormalizePlanarYUVLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); - /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel - * - * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor info. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_mean; - const ICLTensor *_std; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h deleted file mode 100644 index 166c202335..0000000000 --- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H -#define ARM_COMPUTE_CLPADLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the PadLayer function. */ -class CLPadLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel(const CLPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPadLayerKernel(CLPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default; - /** Default destructor */ - ~CLPadLayerKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source tensor. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); - /** Set the input and output tensor. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), - PaddingMode mode = PaddingMode::CONSTANT); - /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel - * - * @param[in] input Source tensor info. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32. - * @param[in] output Output tensor info. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding. - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, - * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - int _input_start_x; - int _input_start_y; - bool _4d_enabled; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPermuteKernel.h b/arm_compute/core/CL/kernels/CLPermuteKernel.h deleted file mode 100644 index 1a9240ef6b..0000000000 --- a/arm_compute/core/CL/kernels/CLPermuteKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H -#define ARM_COMPUTE_CLPERMUTEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class CLPermuteKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPermuteKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteKernel(const CLPermuteKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPermuteKernel &operator=(const CLPermuteKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPermuteKernel(CLPermuteKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPermuteKernel &operator=(CLPermuteKernel &&) = default; - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All. - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to permute. Data types supported: All. - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input First tensor input info. Data types supported: All. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - PermutationVector _perm; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h deleted file mode 100644 index 52a09d9a49..0000000000 --- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H -#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the pixelwise multiplication kernel. */ -class CLPixelWiseMultiplicationKernel : public ICLKernel -{ -public: - /** Default constructor.*/ - CLPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: - * - U8, only if both input are U8 - * - QASYMM8, only if both inputs are QASYMM8 - * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED - * - S16 - * - QSYMM16, only if both inputs are QSYMM16 - * - S32, only if both inputs are QSYMM16 - * - F16 - * - F32 - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; - -/** Interface for the complex pixelwise multiplication kernel. */ -class CLComplexPixelWiseMultiplicationKernel : public ICLKernel -{ -public: - /** Default constructor.*/ - CLComplexPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h deleted file mode 100644 index 395750440c..0000000000 --- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the pooling layer kernel */ -class CLPoolingLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default; - /** Default destructor */ - ~CLPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); - /** Set the input and output tensors. - * - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - ICLTensor *_indices; - PoolingLayerInfo _pool_info; - DataLayout _data_layout; - BorderSize _border_size; - unsigned int _num_elems_processed_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h b/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h deleted file mode 100644 index 5fd27d9233..0000000000 --- a/arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H -#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the PriorBox layer kernel. */ -class CLPriorBoxLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLPriorBoxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete; - /** Default Move Constructor. */ - CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default; - /** Default move assignment operator */ - CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default; - /** Default destructor */ - ~CLPriorBoxLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 - * @param[in] info Prior box layer info. - * @param[in] min Minimum prior box values - * @param[in] max Maximum prior box values - * @param[in] aspect_ratios Aspect ratio values - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1 - * @param[in] info Prior box layer info. - * @param[in] min Minimum prior box values - * @param[in] max Maximum prior box values - * @param[in] aspect_ratios Aspect ratio values - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, - cl::Buffer *aspect_ratios); - /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel - * - * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 - * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1 - * @param[in] info Prior box layer info. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - PriorBoxLayerInfo _info; - int _num_priors; - cl::Buffer *_min; - cl::Buffer *_max; - cl::Buffer *_aspect_ratios; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h deleted file mode 100644 index 2d4707245f..0000000000 --- a/arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to do layer normalization. */ -class CLQLSTMLayerNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLQLSTMLayerNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default; - /** Default destructor */ - ~CLQLSTMLayerNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32. - * - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); - /** Initialise the kernel's input and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); - /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16. - * @param[in] output Destination info tensor. Data type supported: same as @p input - * @param[in] weight Weight info tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor info. Data types supported: S32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_weight; - const ICLTensor *_bias; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h b/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h deleted file mode 100644 index de30447e17..0000000000 --- a/arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the quantization layer kernel. - * - * @note The implementation supports only 3D input tensors. - */ -class CLQuantizationLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLQuantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default; - /** Default move assignment operator */ - CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default; - /** Default destructor */ - ~CLQuantizationLayerKernel() = default; - /** Set the input, output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h b/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h deleted file mode 100644 index 30bdbb1844..0000000000 --- a/arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H -#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the RoIAlign kernel. - */ -class CLROIAlignLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLROIAlignLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete; - /** Default Move Constructor. */ - CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default; - /** Default move assignment operator. */ - CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default; - /** Default destructor */ - ~CLROIAlignLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, - * otherwise same as @p input - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_rois; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/ diff --git a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h deleted file mode 100644 index ea70a58188..0000000000 --- a/arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -#include "arm_compute/core/CL/ICLArray.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the ROI pooling layer kernel */ -class CLROIPoolingLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLROIPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default; - /** Default destructor */ - ~CLROIPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_rois; - ICLTensor *_output; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLRangeKernel.h b/arm_compute/core/CL/kernels/CLRangeKernel.h deleted file mode 100644 index fc8db98bf9..0000000000 --- a/arm_compute/core/CL/kernels/CLRangeKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLRANGEKERNEL_H -#define ARM_COMPUTE_CLRANGEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Kernel class for Range - * - * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments - * of 'step' up to but not including 'end'. - */ -class CLRangeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLRangeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRangeKernel(const CLRangeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRangeKernel &operator=(const CLRangeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLRangeKernel(CLRangeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLRangeKernel &operator=(CLRangeKernel &&) = default; - /** Default destructor */ - ~CLRangeKernel() = default; - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(ICLTensor *output, float start, float end, float step); - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[in] compile_context The compile context to be used. - * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step); - /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel - * - * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - * - * @return a status - */ - static Status validate(const ITensorInfo *output, float start, float end, float step); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - float _start; /**< Start of sequence */ - float _end; /**< End of sequence */ - float _step; /**< Increment/step value */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h deleted file mode 100644 index 0b0b4ae9b0..0000000000 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H -#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reduction operation kernel - */ -class CLReductionOperationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReductionOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionOperationKernel(const CLReductionOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReductionOperationKernel(CLReductionOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default; - /** Default destructor */ - ~CLReductionOperationKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - */ - void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 - * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX - * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h deleted file mode 100644 index f3d1511905..0000000000 --- a/arm_compute/core/CL/kernels/CLRemapKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREMAPKERNEL_H -#define ARM_COMPUTE_CLREMAPKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a remap on a tensor */ -class CLRemapKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLRemapKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRemapKernel(const CLRemapKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLRemapKernel &operator=(const CLRemapKernel &) = delete; - /** Allow instances of this class to be moved */ - CLRemapKernel(CLRemapKernel &&) = default; - /** Allow instances of this class to be moved */ - CLRemapKernel &operator=(CLRemapKernel &&) = default; - /** Initialize the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); - /** Initialize the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_map_x; - const ICLTensor *_map_y; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h deleted file mode 100644 index 9c064858af..0000000000 --- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H -#define ARM_COMPUTE_CLREORGLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a reorg layer */ -class CLReorgLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReorgLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReorgLayerKernel(const CLReorgLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReorgLayerKernel(CLReorgLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. - * @param[out] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t stride); - /** Initialize the kernel's input, output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32. - * @param[out] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride); - /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel - * - * @param[in] input Source tensor. Data types supported: All. - * @param[in] output Destination tensor with tensor shape: - * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has - * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input. - * @param[in] stride Stride value to use for reorganizing the values in the output tensor - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h b/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h deleted file mode 100644 index 3ea74114d0..0000000000 --- a/arm_compute/core/CL/kernels/CLReshapeLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H -#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to perform tensor reshaping */ -class CLReshapeLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReshapeLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReshapeLayerKernel(const CLReshapeLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReshapeLayerKernel &operator=(const CLReshapeLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReshapeLayerKernel(CLReshapeLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReshapeLayerKernel &operator=(CLReshapeLayerKernel &&) = default; - /** Default destructor */ - ~CLReshapeLayerKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLReverseKernel.h b/arm_compute/core/CL/kernels/CLReverseKernel.h deleted file mode 100644 index e8f4507969..0000000000 --- a/arm_compute/core/CL/kernels/CLReverseKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H -#define ARM_COMPUTE_CLREVERSEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the reverse kernel */ -class CLReverseKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReverseKernel(const CLReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLReverseKernel &operator=(const CLReverseKernel &) = delete; - /** Allow instances of this class to be moved */ - CLReverseKernel(CLReverseKernel &&) = default; - /** Allow instances of this class to be moved */ - CLReverseKernel &operator=(CLReverseKernel &&) = default; - /** Default destructor */ - ~CLReverseKernel() = default; - /** Initialise the kernel's inputis and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); - /** Initialise the kernel's inputis and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); - - /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_axis; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h deleted file mode 100644 index 328578d88c..0000000000 --- a/arm_compute/core/CL/kernels/CLScaleKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSCALEKERNEL_H -#define ARM_COMPUTE_CLSCALEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the scale kernel */ -class CLScaleKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's inputs, output and interpolation policy - * - * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[out] output Destination tensor. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy Interpolation type to use - * @param[in] border_mode Selected border mode. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. - */ - void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false); - /** Initialise the kernel's inputs, output and interpolation policy - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[out] output Destination tensor. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy Interpolation type to use - * @param[in] border_mode Selected border mode. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, - SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false); - - /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel - * - * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32 - * @param[in] output Destination tensor info. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy Interpolation type to use - * @param[in] border_mode Selected border mode. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, - bool align_corners = false); - /** Input tensor accessor. - * - * @return Pointer to input tensor. - */ - const ICLTensor *input() const; - /** Output tensor accessor. - * - * @return Pointer to output tensor. - */ - const ICLTensor *output() const; - - // Inherited methods overridden: - BorderSize border_size() const override; - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - InterpolationPolicy _interpolationPolicy = InterpolationPolicy::BILINEAR; - DataLayout _data_layout = DataLayout::UNKNOWN; - bool _align_corners = false; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h deleted file mode 100644 index 209a150a67..0000000000 --- a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H -#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -3 & 0 & +3\\ - * -10& 0 & +10\\ - * -3 & 0 & +3 - * \end{vmatrix} - * @f] - * @f[ - * \mathbf{G}_y=\begin{vmatrix} - * -3 & -10 & -3\\ - * 0 & 0 & 0\\ - * +3 & +10 & +3 - * \end{vmatrix} - * @f] - */ -class CLScharr3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ICLTensor *_input; /**< Input image */ - ICLTensor *_output_x; /**< Output image for scharr X */ - ICLTensor *_output_y; /**< Output image for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSelectKernel.h b/arm_compute/core/CL/kernels/CLSelectKernel.h deleted file mode 100644 index 5cbd985cda..0000000000 --- a/arm_compute/core/CL/kernels/CLSelectKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSELECTKERNEL_H -#define ARM_COMPUTE_CLSELECTKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** OpenCL interface for executing the select kernel - * - * Select is computed by: - * @f[ output(i) = condition(i) ? x(i) : y(i) @f] - **/ -class CLSelectKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSelectKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSelectKernel(const CLSelectKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSelectKernel &operator=(const CLSelectKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSelectKernel(CLSelectKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSelectKernel &operator=(CLSelectKernel &&) = default; - /** Default destructor */ - ~CLSelectKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - * - * @return a status - */ - static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_c; /**< Condition tensor */ - const ICLTensor *_x; /**< Source tensor 1 */ - const ICLTensor *_y; /**< Source tensor 2 */ - ICLTensor *_output; /**< Destination tensor */ - bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h deleted file mode 100644 index 4240fe80b3..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H -#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ -class CLSobel3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; - /** Default destructor */ - ~CLSobel3x3Kernel() = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor for Sobel X */ - ICLTensor *_output_y; /**< Output tensor for Sobel Y */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h deleted file mode 100644 index ef30f0ec93..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H -#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h deleted file mode 100644 index 4eda5a40d4..0000000000 --- a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H -#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h deleted file mode 100644 index b174f493b5..0000000000 --- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLSimple3DKernel.h" -#include "arm_compute/core/KernelDescriptors.h" - -#include <tuple> - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the identifying the max value of 1D Logits */ -class CLLogits1DMaxKernel : public ICLSimple3DKernel -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] output Destination tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; - -/** Interface for shifting, exponentiating and summing the logits */ -class CLLogits1DShiftExpSumKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLogits1DShiftExpSumKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 - */ - void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * @param[in] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_max; - ICLTensor *_output; - ICLTensor *_sum; -}; - -/** Interface for max, shifting, exponentiating and summing the logits */ -class CLLogits1DMaxShiftExpSumKernel : public ICLKernel -{ -public: - /** Info for whether a parallel reduction will be run and the vector size of the execution. */ - using ParallelReductionInfo = std::tuple<bool, unsigned int>; - -public: - /** Default constructor */ - CLLogits1DMaxShiftExpSumKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[in,out] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[in,out] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel - * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: same as @p input - * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum); - /** Checks if the given size is eligible for parallel reduction - * - * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size). - * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4. - * - * @param[in] size Size to check - * - * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run, - * while the second element is the vector size of the execution. - */ - static ParallelReductionInfo is_parallel_reduction(size_t size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_max; - ICLTensor *_output; - ICLTensor *_sum; - -private: - static const unsigned int _grid_size; - static const unsigned int _serial_vector_size; - static const unsigned int _parallel_vector_size; -}; -/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ -class CLLogits1DNormKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLogits1DNormKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32 - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: S32/F16/F32 - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel - * - * @param[in] input Source tensor. Data types supported: S32/F16/F32 - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_sum; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h deleted file mode 100644 index 799b7b16c3..0000000000 --- a/arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H -#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the space to batch kernel */ -class CLSpaceToBatchLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToBatchLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default; - /** Default destructor */ - ~CLSpaceToBatchLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - const ICLTensor *_block_shape; /**< Block shape tensor */ - const ICLTensor *_paddings; /**< Paddings tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h deleted file mode 100644 index f2371e7d87..0000000000 --- a/arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H -#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the space to depth kernel */ -class CLSpaceToDepthLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLSpaceToDepthLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default; - /** Default destructor */ - ~CLSpaceToDepthLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Initialise the kernel's inputs and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLStackLayerKernel.h b/arm_compute/core/CL/kernels/CLStackLayerKernel.h deleted file mode 100644 index e11c0a30d6..0000000000 --- a/arm_compute/core/CL/kernels/CLStackLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H -#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ -class CLStackLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLStackLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStackLayerKernel(const CLStackLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLStackLayerKernel(CLStackLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default; - /** Default destructor */ - ~CLStackLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h b/arm_compute/core/CL/kernels/CLStridedSliceKernel.h deleted file mode 100644 index ebe1b38878..0000000000 --- a/arm_compute/core/CL/kernels/CLStridedSliceKernel.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H -#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the kernel to perform tensor strided slicing */ -class CLStridedSliceKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLStridedSliceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel(const CLStridedSliceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLStridedSliceKernel &operator=(const CLStridedSliceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel(CLStridedSliceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLStridedSliceKernel &operator=(CLStridedSliceKernel &&) = default; - /** Default destructor */ - ~CLStridedSliceKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All. - * @param[in] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h deleted file mode 100644 index 24e333f164..0000000000 --- a/arm_compute/core/CL/kernels/CLTableLookupKernel.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H -#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; -class ICLLut; - -/** Interface for the kernel to perform table lookup calculations. */ -class CLTableLookupKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); - /** Initialise the kernel's input, lut and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h deleted file mode 100644 index 3db48706a3..0000000000 --- a/arm_compute/core/CL/kernels/CLThresholdKernel.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H -#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the thresholding kernel. - * - */ -class CLThresholdKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. - */ - void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTileKernel.h b/arm_compute/core/CL/kernels/CLTileKernel.h deleted file mode 100644 index 68f3c929a6..0000000000 --- a/arm_compute/core/CL/kernels/CLTileKernel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTILEKERNEL_H -#define ARM_COMPUTE_CLTILEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a Tile operation */ -class CLTileKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLTileKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTileKernel(const CLTileKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLTileKernel &operator=(const CLTileKernel &) = delete; - /** Allow instances of this class to be moved */ - CLTileKernel(CLTileKernel &&) = default; - /** Allow instances of this class to be moved */ - CLTileKernel &operator=(CLTileKernel &&) = default; - /** Default destructor */ - ~CLTileKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[out] output Destination tensor. Same as @p input - * - */ - void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples); - /** Set the source, destination of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[out] output Destination tensor. Same as @p input - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); - /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel - * - * @param[in] input Source tensor info. Data type supported: All. - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported). - * @param[in] output Destination tensor info. Same as @p input - * - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLTILEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h deleted file mode 100644 index 09c9e3babf..0000000000 --- a/arm_compute/core/CL/kernels/CLTransposeKernel.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H -#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel which transposes the elements of a matrix. - * - * [width, height, batch] -> [height, width, batch] - * - */ -class CLTransposeKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] output Output tensor. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h deleted file mode 100644 index e6b4209501..0000000000 --- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H -#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the UpsampleLayer kernel on OpenCL. */ -class CLUpsampleLayerKernel : public ICLKernel -{ -public: - /** Constructor */ - CLUpsampleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete; - /** Default Move Constructor. */ - CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default; - /** Default move assignment operator */ - CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default; - /** Default destructor */ - ~CLUpsampleLayerKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _info; - DataLayout _data_layout; - unsigned int _num_elems_processed_per_iteration_input_x; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h deleted file mode 100644 index a21325e1c4..0000000000 --- a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H -#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the warp affine kernel.*/ -class CLWarpAffineKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h deleted file mode 100644 index bb1a018a2b..0000000000 --- a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H -#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H - -#include "arm_compute/core/CL/ICLSimple2DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for the warp perspective kernel.*/ -class CLWarpPerspectiveKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h deleted file mode 100644 index 47e987b09b..0000000000 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H -#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer - * - * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication. - * - * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: - * @f[ - * \left( \begin{array}{ccc} - * a000 & a001 & a002 \\ - * a010 & a011 & a012 \\ - * a020 & a021 & a022 \\ - * \end{array} \right) - * \left( \begin{array}{ccc} - * a100 & a101 & a102 \\ - * a110 & a111 & a112 \\ - * a120 & a121 & a122 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ - * \end{array} \right) - * @f] - */ -class CLWeightsReshapeKernel : public ICLKernel -{ -public: - /** Constructor.*/ - CLWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~CLWeightsReshapeKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - */ - void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr. - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. - * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout - * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_biases; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h deleted file mode 100644 index a39ccc2869..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the width concatenate kernel of 2 tensors. - * The input1 and input2 tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate2TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate2TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h deleted file mode 100644 index 0e0eae6e85..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the width concatenate kernel of 4 tensors. - * All input tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate4TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate4TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] input3 Third tensor info. Data types supported: same as @p input1 - * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - const ICLTensor *_input3; - const ICLTensor *_input4; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h deleted file mode 100644 index ef5851fa9a..0000000000 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLWidthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output); - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _width_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h deleted file mode 100644 index 5b2dc8cfc9..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Winograd filter transform kernel. */ -class CLWinogradFilterTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradFilterTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default; - /** Default destructor */ - ~CLWinogradFilterTransformKernel() = default; - /** Set the input and output tensor. - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - */ - void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Set the input and output tensor. - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel - * - * @note Winograd filter transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd filter transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32. - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h deleted file mode 100644 index a305126f2d..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform Winograd input transform.*/ -class CLWinogradInputTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradInputTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default; - /** Set the input and output of the kernel. - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - */ - void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Set the input and output of the kernel. - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info); - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel - * - * @note Winograd input transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd input transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input The input tensor to transform. Data types supported: F16/F32 - * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>; - - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - DataLayout _data_layout; - int _num_tiles_x; - int _num_tiles_y; - unsigned int _step_z; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h b/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h deleted file mode 100644 index 512b352637..0000000000 --- a/arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H -#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Winograd output transform kernel. */ -class CLWinogradOutputTransformKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWinogradOutputTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default; - /** Default destructor */ - ~CLWinogradOutputTransformKernel() = default; - /** Set the input and output tensor. - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Set the input and output tensor. - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel - * - * @note Winograd output transform supports the following configurations for NCWH data layout - * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3), - * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * @note Winograd output transform supports the following configurations for NHWC data layout - * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3), - * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5) - * - * Strides: only unit strides - * - * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>; - - const ICLTensor *_input; - const ICLTensor *_bias; - ICLTensor *_output; - bool _is_nhwc; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h b/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h deleted file mode 100644 index d0c4a9e417..0000000000 --- a/arm_compute/core/CL/kernels/CLYOLOLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H -#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the YOLO layer kernel that performs partial activation. - * For each box, activate only: - * - x and y position (channel 0 and 1 of each box) - * - objectiveness (channel 4 of each box) - * - classes (channel 5 to (classes - 5) of each box) - */ -class CLYOLOLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLYOLOLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default; - /** Default destructor */ - ~CLYOLOLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_input; - ICLTensor *_output; - bool _run_in_place; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h deleted file mode 100644 index f0f7754960..0000000000 --- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H -#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel -{ -public: - /** Default constructor */ - ICLDepthwiseConvolutionLayer3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Default Move Constructor. */ - ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator */ - ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. - * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization, - * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32 - */ - virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), - const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0; - -protected: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_weights; - const ICLTensor *_biases; - unsigned int _conv_stride_y; - const ICLTensor *_output_multipliers; - const ICLTensor *_output_shifts; - bool _is_quantized; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */ diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h index c7b40baf22..f6f36596c4 100644 --- a/arm_compute/core/CPP/CPPKernels.h +++ b/arm_compute/core/CPP/CPPKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,11 +26,8 @@ /* Header regrouping all the CPP kernels */ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" #include "arm_compute/core/CPP/kernels/CPPTopKVKernel.h" #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h" diff --git a/arm_compute/core/CPP/CPPTypes.h b/arm_compute/core/CPP/CPPTypes.h index d3f6fc944d..e5322bdcb1 100644 --- a/arm_compute/core/CPP/CPPTypes.h +++ b/arm_compute/core/CPP/CPPTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,111 +21,123 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPP_TYPES_H -#define ARM_COMPUTE_CPP_TYPES_H +#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H +#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H #include "arm_compute/core/Error.h" -#include <array> -#include <string> -#include <vector> +#include <memory> namespace arm_compute { -/** CPU models - we only need to detect CPUs we have - * microarchitecture-specific code for. - * - * Architecture features are detected via HWCAPs. - */ -enum class CPUModel +namespace cpuinfo { - GENERIC, - GENERIC_FP16, - GENERIC_FP16_DOT, - A53, - A55r0, - A55r1 -}; - -/** Global memory policy. - * The functions in the runtime will use different strategies based on the policy currently set. - * - * MINIMIZE will try to reduce the amount allocated by the functions at the expense of performance normally. - * NORMAL won't try to save any memory and will favor speed over memory consumption +struct CpuIsaInfo; +} // namespace cpuinfo + +#define ARM_COMPUTE_CPU_MODEL_LIST \ + X(GENERIC) \ + X(GENERIC_FP16) \ + X(GENERIC_FP16_DOT) \ + X(A53) \ + X(A55r0) \ + X(A55r1) \ + X(A35) \ + X(A73) \ + X(A76) \ + X(A510) \ + X(X1) \ + X(V1) \ + X(A64FX) \ + X(N1) + +/** CPU models types * + * @note We only need to detect CPUs we have microarchitecture-specific code for. + * @note Architecture features are detected via HWCAPs. */ -enum class MemoryPolicy +enum class CPUModel { - MINIMIZE, - NORMAL +#define X(model) model, + ARM_COMPUTE_CPU_MODEL_LIST +#undef X }; -/** Convert a cpumodel value to a string - * - * @param val CPUModel value to be converted - * - * @return String representing the corresponding CPUModel. - */ -inline std::string cpu_model_to_string(CPUModel val) -{ - switch(val) - { - case CPUModel::GENERIC: - { - return std::string("GENERIC"); - } - case CPUModel::GENERIC_FP16: - { - return std::string("GENERIC_FP16"); - } - case CPUModel::GENERIC_FP16_DOT: - { - return std::string("GENERIC_FP16_DOT"); - } - case CPUModel::A53: - { - return std::string("A53"); - } - case CPUModel::A55r0: - { - return std::string("A55r0"); - } - case CPUModel::A55r1: - { - return std::string("A55r1"); - } - default: - { - ARM_COMPUTE_ERROR("Invalid CPUModel."); - return std::string("GENERIC"); - } - } -} - class CPUInfo final { -public: - /** Constructor */ +protected: CPUInfo(); + ~CPUInfo(); - /** Disable copy constructor and assignment operator to avoid copying the vector of CPUs each time - * CPUInfo is initialized once in the IScheduler and ThreadInfo will get a pointer to it. +public: + /** Access the KernelLibrary singleton. + * This method has been deprecated and will be removed in future releases + * @return The KernelLibrary instance. */ - CPUInfo &operator=(const CPUInfo &cpuinfo) = delete; - CPUInfo(const CPUInfo &cpuinfo) = delete; - CPUInfo &operator=(CPUInfo &&cpuinfo) = default; - CPUInfo(CPUInfo &&cpuinfo) = default; + static CPUInfo &get(); + + /* Delete move and copy constructors and assignment operator + s */ + CPUInfo(CPUInfo const &) = delete; // Copy construct + CPUInfo(CPUInfo &&) = delete; // Move construct + CPUInfo &operator=(CPUInfo const &) = delete; // Copy assign + CPUInfo &operator=(CPUInfo &&) = delete; // Move assign /** Checks if the cpu model supports fp16. * - * @return true of the cpu supports fp16, false otherwise + * @return true if the cpu supports fp16, false otherwise */ bool has_fp16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_bf16() const; + /** Checks if the cpu model supports bf16. + * + * @return true if the cpu supports bf16, false otherwise + */ + bool has_svebf16() const; /** Checks if the cpu model supports dot product. * - * @return true of the cpu supports dot product, false otherwise + * @return true if the cpu supports dot product, false otherwise */ bool has_dotprod() const; + /** Checks if the cpu model supports floating-point matrix multiplication. + * + * @return true if the cpu supports floating-point matrix multiplication, false otherwise + */ + bool has_svef32mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_i8mm() const; + /** Checks if the cpu model supports integer matrix multiplication. + * + * @return true if the cpu supports integer matrix multiplication, false otherwise + */ + bool has_svei8mm() const; + /** Checks if the cpu model supports sve. + * + * @return true if the cpu supports sve, false otherwise + */ + bool has_sve() const; + /** Checks if the cpu model supports sve2. + * + * @return true if the cpu supports sve2, false otherwise + */ + bool has_sve2() const; + /** Checks if the cpu model supports sme. + * + * @return true if the cpu supports sme, false otherwise + */ + bool has_sme() const; + /** Checks if the cpu model supports sme2. + * + * @return true if the cpu supports sme2, false otherwise + */ + bool has_sme2() const; /** Gets the cpu model for a given cpuid. * * @param[in] cpuid the id of the cpu core to be retrieved, @@ -138,6 +150,11 @@ public: * @return Current thread's @ref CPUModel */ CPUModel get_cpu_model() const; + /** Gets the current cpu's ISA information + * + * @return Current cpu's ISA information + */ + cpuinfo::CpuIsaInfo get_isa() const; /** Gets the L1 cache size * * @return the size of the L1 cache @@ -148,85 +165,41 @@ public: * @return the size of the L1 cache */ unsigned int get_L2_cache_size() const; - /** Set the L1 cache size - * - * @param[in] size the new size to be set. - */ - void set_L1_cache_size(unsigned int size); - /** Set the L2 cache size - * - * @param[in] size the new size to be set. - */ - void set_L2_cache_size(unsigned int size); - /** Set fp16 support - * - * @param[in] fp16 whether the cpu supports fp16. - */ - void set_fp16(const bool fp16); - /** Set dot product support - * - * @param[in] dotprod whether the cpu supports dot product. - */ - void set_dotprod(const bool dotprod); - /** Set the cpumodel for a given cpu core - * - * @param[in] cpuid the id of the core to be set. - * @param[in] model the @ref CPUModel to be set. - */ - void set_cpu_model(unsigned int cpuid, CPUModel model); - /** Set max number of cpus - * - * @param[in] cpu_count the number of CPUs in the system. - */ - void set_cpu_num(unsigned int cpu_count); - /** Return the maximum number of CPUs present * * @return Number of CPUs */ unsigned int get_cpu_num() const; - -private: - std::vector<CPUModel> _percpu = {}; - bool _fp16 = false; - bool _dotprod = false; - unsigned int _L1_cache_size = 32768; - unsigned int _L2_cache_size = 262144; -}; - -class MEMInfo final -{ -public: - MEMInfo(); - - /** Return the total amount of RAM memory in the system expressed in KB. + /** Return the maximum number of CPUs present excluding the little cores + * in case of an Android device * - * @return Total memory + * @return Number of CPUs excluding little */ - size_t get_total_in_kb() const; - - static void set_policy(MemoryPolicy policy); - static MemoryPolicy get_policy(); + unsigned int get_cpu_num_excluding_little() const; + /** Return whether the device has little, medium and big CPUs in case + * of an Android device, returns false otherwise + * + * @return Whether the device has little, medium and big CPUs + */ + bool cpu_has_little_mid_big() const; - /** Common memory sizes expressed in Kb to avoid having them - * duplicated throughout the code. + /** Return the vector length in bytes for sme2 + * + * @return Vector length if sme2 is enabled, otherwise returns 0. */ - static const size_t ONE_GB_IN_KB = { 1035842 }; - static const size_t TWO_GB_IN_KB = { ONE_GB_IN_KB * 2 }; + unsigned long get_sme2_vector_length() const; private: - size_t _total; - size_t _free; - size_t _buffer; - static MemoryPolicy _policy; + struct Impl; + std::unique_ptr<Impl> _impl; }; /** Information about executing thread and CPU. */ struct ThreadInfo { - int thread_id{ 0 }; - int num_threads{ 1 }; - const CPUInfo *cpu_info{ nullptr }; + int thread_id{0}; + int num_threads{1}; + const CPUInfo *cpu_info{nullptr}; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index ec05af20bd..03967a536d 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,16 +25,21 @@ #define ARM_COMPUTE_ICPPKERNEL_H #include "arm_compute/core/CPP/CPPTypes.h" +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/IKernel.h" +#include "arm_compute/core/Types.h" namespace arm_compute { class Window; +class ITensor; /** Common interface for all kernels implemented in C++ */ class ICPPKernel : public IKernel { public: + static constexpr size_t default_mws = 1; /* Default minimum workload size value - no impact */ + /** Default destructor */ virtual ~ICPPKernel() = default; @@ -51,8 +56,7 @@ public: */ virtual void run(const Window &window, const ThreadInfo &info) { - ARM_COMPUTE_UNUSED(window); - ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_UNUSED(window, info); ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked"); } @@ -69,6 +73,37 @@ public: run(window, info); } + /** Execute the kernel on the passed window + * + * @warning If is_parallelisable() returns false then the passed window must be equal to window() + * + * @note The window has to be a region within the window returned by the window() method + * + * @note The width of the window has to be a multiple of num_elems_processed_per_iteration(). + * + * @param[in] tensors A vector containing the tensors to operate on. + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + */ + virtual void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) + { + ARM_COMPUTE_UNUSED(tensors, window, info); + } + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return Minimum workload size for requested configuration. + */ + virtual size_t get_mws(const CPUInfo &platform, size_t thread_count) const + { + ARM_COMPUTE_UNUSED(platform, thread_count); + + return default_mws; + } + /** Name of the kernel * * @return Kernel name diff --git a/arm_compute/core/CPP/ICPPSimpleKernel.h b/arm_compute/core/CPP/ICPPSimpleKernel.h deleted file mode 100644 index acdd054c0e..0000000000 --- a/arm_compute/core/CPP/ICPPSimpleKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICPPSIMPLEKERNEL_H -#define ARM_COMPUTE_ICPPSIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for simple C++ kernels having 1 tensor input and 1 tensor output */ -class ICPPSimpleKernel : public ICPPKernel -{ -public: - /** Constructor */ - ICPPSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel(const ICPPSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ICPPSimpleKernel &operator=(const ICPPSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel(ICPPSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - ICPPSimpleKernel &operator=(ICPPSimpleKernel &&) = default; - /** Default destructor */ - ~ICPPSimpleKernel() = default; - -protected: - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - /** Static function to check if given info will lead to a valid configuration of @ref ICPPSimpleKernel. - * - * @param[in] input Source tensor info. - * @param[in] output Destination tensor info. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_elems_processed_per_iteration, - bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_ICPPSIMPLEKERNEL_H */ diff --git a/arm_compute/core/CPP/Validate.h b/arm_compute/core/CPP/Validate.h deleted file mode 100644 index dfee9de86e..0000000000 --- a/arm_compute/core/CPP/Validate.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPP_VALIDATE_H -#define ARM_COMPUTE_CPP_VALIDATE_H - -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -/** Return an error if the data type of the passed tensor info is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::F16, - function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor info is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor_info Tensor info to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); -#if !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(tensor_info->data_type() == DataType::BFLOAT16, - function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); -#endif /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)) */ - return Status {}; -} - -/** Return an error if the data type of the passed tensor is FP16 and FP16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); - return Status{}; -} - -/** Return an error if the data type of the passed tensor is BFLOAT16 and BFLOAT16 support is not compiled in. - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] tensor Tensor to validate. - * - * @return Status - */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensor *tensor) -{ - ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info())); - return Status{}; -} - -#define ARM_COMPUTE_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) - -#define ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(tensor) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(__func__, __FILE__, __LINE__, tensor)) -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPP_VALIDATE_H */ diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h index 3fa83a6d6d..dd91595ea6 100644 --- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h +++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,7 @@ #ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H #define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute @@ -62,11 +60,19 @@ public: * @param[out] classes The classes output tensor of size [N]. Data types supported: Same as @p scores_in * @param[out] batch_splits_out (Optional) The batch splits output tensor [batch_size]. Data types supported: Same as @p scores_in * @param[out] keeps (Optional) The keeps output tensor of size [N]. Data types supported: Same as@p scores_in - * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: Same as @p scores_in + * @param[out] keeps_size (Optional) Number of filtered indices per class tensor of size [num_classes]. Data types supported: U32 * @param[in] info (Optional) BoxNMSLimitInfo information. */ - void configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out = nullptr, ITensor *keeps = nullptr, ITensor *keeps_size = nullptr, const BoxNMSLimitInfo info = BoxNMSLimitInfo()); + void configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out = nullptr, + ITensor *keeps = nullptr, + ITensor *keeps_size = nullptr, + const BoxNMSLimitInfo info = BoxNMSLimitInfo()); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -76,9 +82,9 @@ public: void run_nmslimit(); private: - const ITensor *_scores_in; - const ITensor *_boxes_in; - const ITensor *_batch_splits_in; + const ITensor *_scores_in; + const ITensor *_boxes_in; + const ITensor *_batch_splits_in; ITensor *_scores_out; ITensor *_boxes_out; ITensor *_classes; diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h deleted file mode 100644 index eeb6a65525..0000000000 --- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H -#define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include "support/Mutex.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -/** Interface for CPP Images. */ -using IImage = ITensor; - -/** CPP kernel to perform corner candidates - */ -class CPPCornerCandidatesKernel : public INEKernel -{ -public: - const char *name() const override - { - return "CPPCornerCandidatesKernel"; - } - /** Default constructor */ - CPPCornerCandidatesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPCornerCandidatesKernel(const CPPCornerCandidatesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPCornerCandidatesKernel &operator=(const CPPCornerCandidatesKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CPPCornerCandidatesKernel(CPPCornerCandidatesKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CPPCornerCandidatesKernel &operator=(CPPCornerCandidatesKernel &&) = delete; - /** Default destructor */ - ~CPPCornerCandidatesKernel() = default; - - /** Setup the kernel parameters - * - * @param[in] input Source image (harris score). Format supported F32 - * @param[out] output Destination array of InternalKeypoint - * @param[out] num_corner_candidates Number of corner candidates - */ - void configure(const IImage *input, InternalKeypoint *output, int32_t *num_corner_candidates); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - int32_t *_num_corner_candidates; /**< Number of corner candidates */ - arm_compute::Mutex _corner_candidates_mutex; /**< Mutex to preventing race conditions */ - const IImage *_input; /**< Source image - Harris score */ - InternalKeypoint *_output; /**< Array of NEInternalKeypoint */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h deleted file mode 100644 index cf8e4f00b9..0000000000 --- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H -#define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** CPP kernel to perform in-place computation of euclidean distance on IDetectionWindowArray - * - * @note This kernel is meant to be used alongside HOG or other object detection algorithms to perform a non-maxima suppression on a - * IDetectionWindowArray - */ -class CPPDetectionWindowNonMaximaSuppressionKernel : public ICPPKernel -{ -public: - const char *name() const override - { - return "CPPDetectionWindowNonMaximaSuppressionKernel"; - } - /** Default constructor */ - CPPDetectionWindowNonMaximaSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPDetectionWindowNonMaximaSuppressionKernel(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPDetectionWindowNonMaximaSuppressionKernel &operator=(const CPPDetectionWindowNonMaximaSuppressionKernel &) = delete; - /** Allow instances of this class to be moved */ - CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; - /** Allow instances of this class to be moved */ - CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default; - /** Initialise the kernel's input, output and the euclidean minimum distance - * - * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after - * the run() method of @ref CPPDetectionWindowNonMaximaSuppressionKernel - * - * @param[in, out] input_output Input/Output array of @ref DetectionWindow - * @param[in] min_distance Radial Euclidean distance for non-maxima suppression - */ - void configure(IDetectionWindowArray *input_output, float min_distance); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - IDetectionWindowArray *_input_output; - float _min_distance; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h index cb416af070..d1f7f8670f 100644 --- a/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h +++ b/arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H #define ARM_COMPUTE_CPP_NONMAXIMUMSUPPRESSIONKERNEL_LAYER_H -#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" - #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" namespace arm_compute { @@ -65,7 +64,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - void configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, unsigned int max_output_size, const float score_threshold, const float iou_threshold); + void configure(const ITensor *input_bboxes, + const ITensor *input_scores, + ITensor *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); /** Static function to check if given arguments will lead to a valid configuration of @ref CPPNonMaximumSuppressionKernel * @@ -77,8 +81,12 @@ public: * @param[in] iou_threshold The threshold used in non maximum suppression. * */ - static Status validate(const ITensorInfo *input_bboxes, const ITensorInfo *input_scores, const ITensorInfo *output_indices, unsigned int max_output_size, - const float score_threshold, const float iou_threshold); + static Status validate(const ITensorInfo *input_bboxes, + const ITensorInfo *input_scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h index e75152f4ea..d141c2fb70 100644 --- a/arm_compute/core/CPP/kernels/CPPPermuteKernel.h +++ b/arm_compute/core/CPP/kernels/CPPPermuteKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,15 +56,15 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32 - * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[out] output The output tensor. Data types supported: same as @p input * @param[in] perm Permutation vector */ void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32 - * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to permute. Data types supported: All. + * @param[in] output The output tensor. Data types supported: same as @p input * @param[in] perm Permutation vector * * @return a status diff --git a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h b/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h deleted file mode 100644 index d127ef8d8a..0000000000 --- a/arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H -#define ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" -#include "arm_compute/core/IArray.h" - -#include <cstdint> -#include <mutex> - -namespace arm_compute -{ -/** CPP kernel to perform sorting and euclidean distance */ -class CPPSortEuclideanDistanceKernel : public ICPPKernel -{ -public: - const char *name() const override - { - return "CPPSortEuclideanDistanceKernel"; - } - /** Default constructor */ - CPPSortEuclideanDistanceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPSortEuclideanDistanceKernel(const CPPSortEuclideanDistanceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPSortEuclideanDistanceKernel &operator=(const CPPSortEuclideanDistanceKernel &) = delete; - /** Allow instances of this class to be moved */ - CPPSortEuclideanDistanceKernel(CPPSortEuclideanDistanceKernel &&) = default; - /** Allow instances of this class to be moved */ - CPPSortEuclideanDistanceKernel &operator=(CPPSortEuclideanDistanceKernel &&) = default; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in,out] in_out Input internal keypoints. Marked as out as the kernel writes 0 in the strength member. - * @param[out] output Output keypoints. - * @param[in] num_corner_candidates Pointer to the number of corner candidates in the input array - * @param[in] min_distance Radial Euclidean distance to use - */ - void configure(InternalKeypoint *in_out, IKeyPointArray *output, const int32_t *num_corner_candidates, float min_distance); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const int32_t *_num_corner_candidates; /**< Number of corner candidates */ - float _min_distance; /**< Radial Euclidean distance */ - InternalKeypoint *_in_out; /**< Source array of InternalKeypoint */ - IKeyPointArray *_output; /**< Destination array of IKeyPointArray */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPPSORTEUCLIDEANDISTANCEKERNEL_H */ diff --git a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h index 4b9bfdd3c9..7326a10e2f 100644 --- a/arm_compute/core/CPP/kernels/CPPTopKVKernel.h +++ b/arm_compute/core/CPP/kernels/CPPTopKVKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ public: /** Set the input and output of the kernel. * * @param[in] predictions A batch_size x classes tensor. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED - * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: S32 + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 * @param[in] k Number of top elements to look at for computing precision. */ @@ -63,13 +63,14 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CPPTopKVKernel * * @param[in] predictions A batch_size x classes tensor info. Data types supported: F16/S32/F32/QASYMM8/QASYMM8_SIGNED - * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: S32 + * @param[in] targets A batch_size 1D tensor info of class ids. Data types supported: U32 * @param[in] output Computed precision at @p k as a bool 1D tensor info. Data types supported: U8 * @param[in] k Number of top elements to look at for computing precision. * * @return a status */ - static Status validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h index 9fbc9b697c..dd7e07c390 100644 --- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h +++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,8 +55,8 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED - * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] input The input tensor to upsample. Data types supported: All. + * @param[out] output The output tensor. Data types supported: same as @p input. * @param[in] info Padding info. */ void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info); diff --git a/arm_compute/core/Coordinates.h b/arm_compute/core/Coordinates.h index 78ca5250ab..d1240bb10a 100644 --- a/arm_compute/core/Coordinates.h +++ b/arm_compute/core/Coordinates.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,8 +42,7 @@ public: * @param[in] coords Values to initialize the dimensions. */ template <typename... Ts> - constexpr Coordinates(Ts... coords) - : Dimensions{ coords... } + constexpr Coordinates(Ts... coords) : Dimensions{coords...} { } /** Allow instances of this class to be copy constructed */ @@ -57,5 +56,5 @@ public: /** Default destructor */ ~Coordinates() = default; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_COORDINATES_H*/ diff --git a/arm_compute/core/CoreTypes.h b/arm_compute/core/CoreTypes.h new file mode 100644 index 0000000000..1a9db1937c --- /dev/null +++ b/arm_compute/core/CoreTypes.h @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_CORE_CORETYPES +#define ACL_ARM_COMPUTE_CORE_CORETYPES + +#include "arm_compute/core/Strides.h" + +#include "support/Half.h" + +/** CoreTypes.h groups together essential small types that are used across functions */ + +namespace arm_compute +{ +/** 16-bit floating point type */ +using half = half_float::half; +/** Permutation vector */ +using PermutationVector = Strides; + +/** Available channels */ +enum class Channel +{ + UNKNOWN, /** Unknown channel format */ + C0, /**< First channel (used by formats with unknown channel types). */ + C1, /**< Second channel (used by formats with unknown channel types). */ + C2, /**< Third channel (used by formats with unknown channel types). */ + C3, /**< Fourth channel (used by formats with unknown channel types). */ + R, /**< Red channel. */ + G, /**< Green channel. */ + B, /**< Blue channel. */ + A, /**< Alpha channel. */ + Y, /**< Luma channel. */ + U, /**< Cb/U channel. */ + V /**< Cr/V/Value channel. */ +}; + +/** Image colour formats */ +enum class Format +{ + UNKNOWN, /**< Unknown image format */ + U8, /**< 1 channel, 1 U8 per channel */ + S16, /**< 1 channel, 1 S16 per channel */ + U16, /**< 1 channel, 1 U16 per channel */ + S32, /**< 1 channel, 1 S32 per channel */ + U32, /**< 1 channel, 1 U32 per channel */ + S64, /**< 1 channel, 1 S64 per channel */ + U64, /**< 1 channel, 1 U64 per channel */ + BFLOAT16, /**< 16-bit brain floating-point number */ + F16, /**< 1 channel, 1 F16 per channel */ + F32, /**< 1 channel, 1 F32 per channel */ + UV88, /**< 2 channel, 1 U8 per channel */ + RGB888, /**< 3 channels, 1 U8 per channel */ + RGBA8888, /**< 4 channels, 1 U8 per channel */ + YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ + YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ + NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ + NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ + IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ + UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ +}; + +/** Available data types */ +enum class DataType +{ + UNKNOWN, /**< Unknown data type */ + U8, /**< unsigned 8-bit number */ + S8, /**< signed 8-bit number */ + QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ + QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ + QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ + QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ + U16, /**< unsigned 16-bit number */ + S16, /**< signed 16-bit number */ + QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ + QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ + U32, /**< unsigned 32-bit number */ + S32, /**< signed 32-bit number */ + U64, /**< unsigned 64-bit number */ + S64, /**< signed 64-bit number */ + BFLOAT16, /**< 16-bit brain floating-point number */ + F16, /**< 16-bit floating-point number */ + F32, /**< 32-bit floating-point number */ + F64, /**< 64-bit floating-point number */ + SIZET /**< size_t */ +}; + +/** [DataLayout enum definition] **/ + +/** Supported tensor data layouts */ +enum class DataLayout +{ + UNKNOWN, /**< Unknown data layout */ + NCHW, /**< Num samples, channels, height, width */ + NHWC, /**< Num samples, height, width, channels */ + NCDHW, /**< Num samples, channels, depth, height, width */ + NDHWC /**< Num samples, depth, height, width, channels */ +}; +/** [DataLayout enum definition] **/ + +/** Supported tensor data layout dimensions */ +enum class DataLayoutDimension +{ + CHANNEL, /**< channel */ + HEIGHT, /**< height */ + WIDTH, /**< width */ + DEPTH, /**< depth */ + BATCHES /**< batches */ +}; + +/** Dimension rounding type when down-scaling on CNNs + * @note Used in pooling and convolution layer + */ +enum class DimensionRoundingType +{ + FLOOR, /**< Floor rounding */ + CEIL /**< Ceil rounding */ +}; + +class PadStrideInfo +{ +public: + /** Constructor + * + * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. + * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. + * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. + * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. + * @param[in] round (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR. + */ + PadStrideInfo(unsigned int stride_x = 1, + unsigned int stride_y = 1, + unsigned int pad_x = 0, + unsigned int pad_y = 0, + DimensionRoundingType round = DimensionRoundingType::FLOOR) + : _stride(std::make_pair(stride_x, stride_y)), + _pad_left(pad_x), + _pad_top(pad_y), + _pad_right(pad_x), + _pad_bottom(pad_y), + _round_type(round) + { + } + /** Constructor + * + * @param[in] stride_x Stride, in elements, across x. + * @param[in] stride_y Stride, in elements, across y. + * @param[in] pad_left Padding across x on the left, in elements. + * @param[in] pad_right Padding across x on the right, in elements. + * @param[in] pad_top Padding across y on the top, in elements. + * @param[in] pad_bottom Padding across y on the bottom, in elements. + * @param[in] round Dimensions rounding. + */ + PadStrideInfo(unsigned int stride_x, + unsigned int stride_y, + unsigned int pad_left, + unsigned int pad_right, + unsigned int pad_top, + unsigned int pad_bottom, + DimensionRoundingType round) + : _stride(std::make_pair(stride_x, stride_y)), + _pad_left(pad_left), + _pad_top(pad_top), + _pad_right(pad_right), + _pad_bottom(pad_bottom), + _round_type(round) + { + } + /** Get the stride. + * + * @return a pair: stride x, stride y. + */ + std::pair<unsigned int, unsigned int> stride() const + { + return _stride; + } + /** Check whether the padding is symmetric. + * + * @return True if the padding is symmetric. + */ + bool padding_is_symmetric() const + { + return (_pad_left == _pad_right) && (_pad_top == _pad_bottom); + } + /** Get the padding. + * + * @note This should only be used when the padding is symmetric. + * + * @return a pair: padding left/right, padding top/bottom + */ + std::pair<unsigned int, unsigned int> pad() const + { + //this accessor should be used only when padding is symmetric + ARM_COMPUTE_ERROR_ON(!padding_is_symmetric()); + return std::make_pair(_pad_left, _pad_top); + } + + /** Get the left padding */ + unsigned int pad_left() const + { + return _pad_left; + } + /** Get the right padding */ + unsigned int pad_right() const + { + return _pad_right; + } + /** Get the top padding */ + unsigned int pad_top() const + { + return _pad_top; + } + /** Get the bottom padding */ + unsigned int pad_bottom() const + { + return _pad_bottom; + } + + /** Get the rounding type */ + DimensionRoundingType round() const + { + return _round_type; + } + + /** Check whether this has any padding */ + bool has_padding() const + { + return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); + } + +private: + std::pair<unsigned int, unsigned int> _stride; + unsigned int _pad_left; + unsigned int _pad_top; + unsigned int _pad_right; + unsigned int _pad_bottom; + + DimensionRoundingType _round_type; +}; + +/** Memory layouts for the weights tensor. + * + * * UNSPECIFIED is used to select kernels that do not run in + * variable weights mode. + * + * * ANY is used to query the kernel database to retrieve any of the + * kernels that runs in variable weights mode. Once a kernel is + * found, the specific format expected by the kernel can be + * retrieved by the user for reordering the weights tensor + * accordingly. + * + * The other values OHWIo{interleave_by}i{block_by} describe the + * memory layout of a 4D tensor with layout OHWI that has been + * transformed into a 4D tensor with dimensions O'HWI' where: + * + * O' = first multiple of {interleave_by} s.t. O<=O' + * I' = first multiple of {block_by} s.t. I<=I' + * + * The total size of the dst tensor is O' x H x W x I' + * + * The access function of the tensor with layout + * OHWIo{interleave_by}i{block_by} and size O'HWI' is a 6-parameter + * access function, where the 6 parameters are computed as follows: + * + * x5 = floor(o/{interleave_by}) RANGE [0, O'/{interleave_by} -1] SIZE: O'/{interleave_by} + * + * x4 = h RANGE [0, H-1] SIZE: H + * x3 = w RANGE [0, W-1] SIZE: W + * x2 = floor(i/{block_by}) RANGE [0, I'/{block_by} -1] SIZE: I'/{block_by} + * x1 = o%{interleave_by} RANGE [0, {interleave_by} -1] SIZE: {interleave_by} + * x0 = i%{block_by} RANGE [0, {block_by} -1] SIZE: {block_by} + * TOTAL SIZE: O' * H * W * I' + * + * 4D 6D + * ----------------- ----------------------------------- + * value(o, h, w, i) = x5 * H * W * I' * {interleave_by} + * + x4 * W * I' * {interleave_by} + * + x3 * I' * {interleave_by} + * + x2 * {interleave_by} * {block_by} + * + x1 * {block_by} + * + x0 + * + * Notice that in arm_gemm the 4D tensor of dimension O'HWI' created + * for the OHWIo{interleave_by}i{block_by} format is in reality seen + * as a 2D tensor, where the number of rows is O'/{interleave_by} + * and the number of columns is {interleave_by} * H * W * I'. + * + * The postfix *_bf16 is for the memory layout needed for the + * fast-mode kernels, in which the weights are passed in bfloat16 + * format. + */ +enum class WeightFormat +{ + UNSPECIFIED = 0x1, + ANY = 0x2, + OHWI = 0x100100, + OHWIo2 = 0x100200, + OHWIo4 = 0x100400, + OHWIo8 = 0x100800, + OHWIo16 = 0x101000, + OHWIo32 = 0x102000, + OHWIo64 = 0x104000, + OHWIo128 = 0x108000, + OHWIo4i2 = 0x200400, + OHWIo4i2_bf16 = 0x200410, + OHWIo8i2 = 0x200800, + OHWIo8i2_bf16 = 0x200810, + OHWIo16i2 = 0x201000, + OHWIo16i2_bf16 = 0x201010, + OHWIo32i2 = 0x202000, + OHWIo32i2_bf16 = 0x202010, + OHWIo64i2 = 0x204000, + OHWIo64i2_bf16 = 0x204010, + OHWIo4i4 = 0x400400, + OHWIo4i4_bf16 = 0x400410, + OHWIo8i4 = 0x400800, + OHWIo8i4_bf16 = 0x400810, + OHWIo16i4 = 0x401000, + OHWIo16i4_bf16 = 0x401010, + OHWIo32i4 = 0x402000, + OHWIo32i4_bf16 = 0x402010, + OHWIo64i4 = 0x404000, + OHWIo64i4_bf16 = 0x404010, + OHWIo2i8 = 0x800200, + OHWIo4i8 = 0x800400, + OHWIo8i8 = 0x800800, + OHWIo16i8 = 0x801000, + OHWIo32i8 = 0x802000, + OHWIo64i8 = 0x804000 +}; + +} // namespace arm_compute +#endif /* ACL_ARM_COMPUTE_CORE_CORETYPES */ diff --git a/arm_compute/core/Dimensions.h b/arm_compute/core/Dimensions.h index fbaef3a8f0..bb8692d70a 100644 --- a/arm_compute/core/Dimensions.h +++ b/arm_compute/core/Dimensions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include <algorithm> #include <array> #include <functional> +#include <limits> #include <numeric> namespace arm_compute @@ -49,8 +50,7 @@ public: * @param[in] dims Values to initialize the dimensions. */ template <typename... Ts> - explicit Dimensions(Ts... dims) - : _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) } + explicit Dimensions(Ts... dims) : _id{{static_cast<T>(dims)...}}, _num_dimensions{sizeof...(dims)} { } @@ -68,14 +68,19 @@ public: /** Accessor to set the value of one of the dimensions. * - * @param[in] dimension Dimension for which the value is set. - * @param[in] value Value to be set for the dimension. + * @param[in] dimension Dimension for which the value is set. + * @param[in] value Value to be set for the dimension. + * @param[in] increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of dimensions (e.g. for Coordinates), false otherwise (e.g. for TensorShapes) */ - void set(size_t dimension, T value) + void set(size_t dimension, T value, bool increase_dim_unit = true) { ARM_COMPUTE_ERROR_ON(dimension >= num_max_dimensions); - _id[dimension] = value; - _num_dimensions = std::max(_num_dimensions, dimension + 1); + _id[dimension] = value; + // Don't increase the number of dimensions if the new dimension is 1 + if (increase_dim_unit || value != 1) + { + _num_dimensions = std::max(_num_dimensions, dimension + 1); + } } /** Alias to access the size of the first dimension */ T x() const @@ -92,6 +97,21 @@ public: { return _id[2]; } + /** Increments the given dimension by a step size, avoiding overflows + * + * @note Precondition: dim < _num_dimensions + * + * @param[in] dim Dimension to increment. + * @param[in] step Step to increment @p dim by. + */ + void increment(size_t dim, T step = 1) + { + ARM_COMPUTE_ERROR_ON(dim >= _num_dimensions); + if ((std::numeric_limits<T>::max() - _id[dim]) >= step) + { + _id[dim] += step; + } + } /** Generic accessor to get the size of any dimension * * @note Precondition: dimension < Dimensions::num_max_dimensions @@ -141,7 +161,7 @@ public: const size_t last = std::min(_num_dimensions, first + n); - if(last > (first + 1)) + if (last > (first + 1)) { // Collapse dimensions into the first _id[first] = std::accumulate(&_id[first], &_id[last], 1, std::multiplies<T>()); @@ -175,7 +195,7 @@ public: void remove(size_t idx) { ARM_COMPUTE_ERROR_ON(_num_dimensions < 1); - if(idx >= _num_dimensions) + if (idx >= _num_dimensions) { return; } @@ -241,7 +261,7 @@ protected: ~Dimensions() = default; std::array<T, num_max_dimensions> _id; - size_t _num_dimensions{ 0 }; + size_t _num_dimensions{0}; }; /** Check that given dimensions are equal. @@ -268,5 +288,5 @@ inline bool operator!=(const Dimensions<T> &lhs, const Dimensions<T> &rhs) { return !(lhs == rhs); } -} +} // namespace arm_compute #endif /*ARM_COMPUTE_DIMENSIONS_H*/ diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h index dd3e8889bc..7a7033805a 100644 --- a/arm_compute/core/Error.h +++ b/arm_compute/core/Error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2019, 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,8 +53,7 @@ class Status { public: /** Default Constructor **/ - Status() - : _code(ErrorCode::OK), _error_description(" ") + Status() : _code(ErrorCode::OK), _error_description(" ") { } /** Default Constructor @@ -101,7 +100,7 @@ public: /** Throws a runtime exception in case it contains a valid error status */ void throw_if_error() const { - if(!bool(*this)) + if (!bool(*this)) { internal_throw_on_error(); } @@ -119,7 +118,7 @@ private: /** Creates an error containing the error message * * @param[in] error_code Error code - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. * * @return status containing the error */ @@ -131,7 +130,7 @@ Status create_error(ErrorCode error_code, std::string msg); * @param[in] func Function in which the error occurred. * @param[in] file File in which the error occurred. * @param[in] line Line in which the error occurred. - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. * * @return status containing the error */ @@ -141,7 +140,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] err Error status */ [[noreturn]] void throw_error(Status err); -} +} // namespace arm_compute /** To avoid unused variables warnings * * This is useful if for example a variable is only used @@ -156,7 +155,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] error_code Error code. * @param[in] msg Message to encapsulate. */ -#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg) +#define ARM_COMPUTE_CREATE_ERROR(error_code, msg) \ + arm_compute::create_error_msg(error_code, __func__, __FILE__, __LINE__, msg) /** Creates an error on location with a given message * @@ -164,9 +164,10 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] func Function in which the error occurred. * @param[in] file File in which the error occurred. * @param[in] line Line in which the error occurred. - * @param[in] msg Message to display before aborting. + * @param[in] msg Message to display before abandoning. */ -#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) arm_compute::create_error_msg(error_code, func, file, line, msg) +#define ARM_COMPUTE_CREATE_ERROR_LOC(error_code, func, file, line, msg) \ + arm_compute::create_error_msg(error_code, func, file, line, msg) /** Creates an error on location with a given message. Accepts a message format * and a variable list of arguments matching the format description. @@ -178,14 +179,14 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \ - do \ - { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ - snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ - arm_compute::create_error(error_code, std::string(out.data())); \ - } while(false) +#define ARM_COMPUTE_CREATE_ERROR_LOC_VAR(error_code, func, file, line, msg, ...) \ + do \ + { \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ + arm_compute::create_error(error_code, std::string(out.data())); \ + } while (false) /** An error is returned with the given description. * @@ -195,7 +196,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file do \ { \ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, __VA_ARGS__); \ - } while(false) + } while (false) /** Checks if a status contains an error and returns it * @@ -204,18 +205,18 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ON_ERROR(status) \ do \ { \ - if(!bool(status)) \ + const auto s = status; \ + if (!bool(s)) \ { \ - return status; \ + return s; \ } \ - } while(false) + } while (false) /** Checks if an error value is valid if not throws an exception with the error * * @param[in] error Error value to check. */ -#define ARM_COMPUTE_THROW_ON_ERROR(error) \ - error.throw_if_error(); +#define ARM_COMPUTE_THROW_ON_ERROR(error) error.throw_if_error(); /** If the condition is true, an error is returned. Accepts a message format * and a variable list of arguments matching the format description. @@ -227,28 +228,29 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ERROR_ON_MSG_VAR(cond, msg, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ - std::array<char, 512> out{ 0 }; \ + std::array<char, 512> out{0}; \ int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", __func__, __FILE__, __LINE__); \ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ return arm_compute::create_error(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ } \ - } while(false) + } while (false) /** If the condition is true, an error is returned * * @param[in] cond Condition to evaluate. * @param[in] msg Error description message */ -#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \ - do \ - { \ - if(cond) \ - { \ - return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, msg); \ - } \ - } while(false) +#define ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, msg) \ + do \ + { \ + if (cond) \ + { \ + return arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, __func__, __FILE__, __LINE__, \ + msg); \ + } \ + } while (false) /** If the condition is true, an error is thrown. Accepts a message format * and a variable list of arguments matching the format description. @@ -260,17 +262,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \ - do \ - { \ - if(cond) \ - { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ - snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ - return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ - } \ - } while(false) +#define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(cond, func, file, line, msg, ...) \ + do \ + { \ + if (cond) \ + { \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ + return arm_compute::create_error(ErrorCode::RUNTIME_ERROR, std::string(out.data())); \ + } \ + } while (false) /** If the condition is true, an error is thrown. * @@ -283,18 +285,17 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(cond, func, file, line, msg) \ do \ { \ - if(cond) \ + if (cond) \ { \ return arm_compute::create_error_msg(ErrorCode::RUNTIME_ERROR, func, file, line, msg); \ } \ - } while(false) + } while (false) /** If the condition is true, an error is returned * * @param[in] cond Condition to evaluate */ -#define ARM_COMPUTE_RETURN_ERROR_ON(cond) \ - ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond) +#define ARM_COMPUTE_RETURN_ERROR_ON(cond) ARM_COMPUTE_RETURN_ERROR_ON_MSG(cond, #cond) /** If the condition is true, an error is returned * @@ -313,11 +314,12 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] line Line in which the error occurred. * @param[in] msg Message to display. */ -#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \ - do \ - { \ - arm_compute::throw_error(arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \ - } while(false) +#define ARM_COMPUTE_THROW_ERROR(func, file, line, msg) \ + do \ + { \ + arm_compute::throw_error( \ + arm_compute::create_error_msg(arm_compute::ErrorCode::RUNTIME_ERROR, func, file, line, msg)); \ + } while (false) /** Print the given message then throw an std::runtime_error. Accepts a message format * and a variable list of arguments matching the format description. @@ -331,11 +333,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, ...) \ do \ { \ - std::array<char, 512> out{ 0 }; \ - int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ + std::array<char, 512> out{0}; \ + int offset = snprintf(out.data(), out.size(), "in %s %s:%d: ", func, file, line); \ snprintf(out.data() + offset, out.size() - offset, msg, __VA_ARGS__); \ arm_compute::throw_error(arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, std::string(out.data()))); \ - } while(false) + } while (false) /** Print the given message then throw an std::runtime_error. Accepts a message format * and a variable list of arguments matching the format description. @@ -360,7 +362,8 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT +#define ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, msg, ...) \ + ARM_COMPUTE_THROW_ERROR_VAR(func, file, line, msg, __VA_ARGS__) // NOLINT /** Print the given message then throw an std::runtime_error. * @@ -379,11 +382,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_EXIT_ON_MSG(cond, msg) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR(msg); \ } \ - } while(false) + } while (false) /** If the condition is true, the given message is printed and program exits. Accepts a message format * and a variable list of arguments matching the format description. @@ -395,27 +398,25 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR_VAR(msg, __VA_ARGS__); \ } \ - } while(false) + } while (false) #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** Checks if a status value is valid if not throws an exception with the error * * @param[in] status Status value to check. */ -#define ARM_COMPUTE_ERROR_THROW_ON(status) \ - status.throw_if_error() +#define ARM_COMPUTE_ERROR_THROW_ON(status) status.throw_if_error() /** If the condition is true, the given message is printed and an exception is thrown * * @param[in] cond Condition to evaluate. * @param[in] msg Message to display. */ -#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) \ - ARM_COMPUTE_EXIT_ON_MSG(cond, msg) +#define ARM_COMPUTE_ERROR_ON_MSG(cond, msg) ARM_COMPUTE_EXIT_ON_MSG(cond, msg) /** If the condition is true, the given message is printed and an exception is thrown. Accepts a message format * and a variable list of arguments matching the format description. @@ -424,8 +425,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * @param[in] msg Error description message format. * @param[in] ... List of arguments matching the format description. */ -#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) \ - ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__) +#define ARM_COMPUTE_ERROR_ON_MSG_VAR(cond, msg, ...) ARM_COMPUTE_EXIT_ON_MSG_VAR(cond, msg, __VA_ARGS__) /** If the condition is true, the given message is printed and an exception is thrown. * @@ -438,11 +438,11 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file #define ARM_COMPUTE_ERROR_ON_LOC_MSG(cond, func, file, line, ...) \ do \ { \ - if(cond) \ + if (cond) \ { \ ARM_COMPUTE_ERROR_LOC_VAR(func, file, line, __VA_ARGS__); \ } \ - } while(false) + } while (false) /** If the condition is true, the given message is printed and an exception is thrown, otherwise value is returned * @@ -463,8 +463,7 @@ Status create_error_msg(ErrorCode error_code, const char *func, const char *file * * @param[in] cond Condition to evaluate. */ -#define ARM_COMPUTE_ERROR_ON(cond) \ - ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) +#define ARM_COMPUTE_ERROR_ON(cond) ARM_COMPUTE_ERROR_ON_MSG(cond, #cond) /** If the condition is true then an error message is printed and an exception thrown * diff --git a/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h b/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h deleted file mode 100644 index 9706c9b3a6..0000000000 --- a/arm_compute/core/GLES_COMPUTE/GCCoreRuntimeContext.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H -#define ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H - -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" - -namespace arm_compute -{ -// Forward declarations -class GCKernelLibrary; - -/** Core runtime context for OpenGL ES */ -class GCCoreRuntimeContext final -{ -public: - /** Legacy constructor */ - GCCoreRuntimeContext(); - - /** Constructor */ - GCCoreRuntimeContext(GCKernelLibrary *kernel_lib); - /** Destructor */ - ~GCCoreRuntimeContext() = default; - /** Default copy constructor */ - GCCoreRuntimeContext(const GCCoreRuntimeContext &) = default; - /** Default move constructor */ - GCCoreRuntimeContext(GCCoreRuntimeContext &&) = default; - /** Default copy assignment */ - GCCoreRuntimeContext &operator=(const GCCoreRuntimeContext &) = default; - /** Default move assignment operator */ - GCCoreRuntimeContext &operator=(GCCoreRuntimeContext &&) = default; - /** Kernel Library accessor - * - * @return The kernel library instance used by the core context - */ - GCKernelLibrary *kernel_library() const; - -private: - GCKernelLibrary *_kernel_lib{ nullptr }; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCCORERUNTIME_CONTEXT_H */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h deleted file mode 100644 index 0f6daf786b..0000000000 --- a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCKERNELLIBRARY_H -#define ARM_COMPUTE_GCKERNELLIBRARY_H - -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/Utils.h" - -#include <map> -#include <set> -#include <string> -#include <utility> -#include <vector> - -namespace arm_compute -{ -/** GCProgram class */ -class GCProgram final -{ -public: - /** Default constructor. */ - GCProgram(); - /** Construct program from source file. - * - * @param[in] name Program name. - * @param[in] source Program source. - */ - GCProgram(std::string name, std::string source); - /** Default Copy Constructor. */ - GCProgram(const GCProgram &) = default; - /** Default Move Constructor. */ - GCProgram(GCProgram &&) = default; - /** Default copy assignment operator */ - GCProgram &operator=(const GCProgram &) = default; - /** Default move assignment operator */ - GCProgram &operator=(GCProgram &&) = default; - /** Returns program name. - * - * @return Program's name. - */ - std::string name() const - { - return _name; - } - /** Link program. - * - * @param[in] shader Shader used to link program. - * - * @return linked program id . - */ - GLuint link_program(GLuint shader); - /** Compile shader. - * - * @param[in] build_options Shader build options. - * - * @return GLES shader object. - */ - GLuint compile_shader(const std::string &build_options); - -private: - std::string _name; /**< Program name. */ - std::string _source; /**< Source code for the program. */ -}; - -/** GCKernel class */ -class GCKernel final -{ -public: - /** Default Constructor. */ - GCKernel(); - /** Default Copy Constructor. */ - GCKernel(const GCKernel &) = default; - /** Default Move Constructor. */ - GCKernel(GCKernel &&) = default; - /** Default copy assignment operator */ - GCKernel &operator=(const GCKernel &) = default; - /** Default move assignment operator */ - GCKernel &operator=(GCKernel &&) = default; - /** Constructor. - * - * @param[in] name Kernel name. - * @param[in] program Built program. - */ - GCKernel(std::string name, GLuint program); - /** Destructor. - */ - ~GCKernel(); - /** Returns kernel name. - * - * @return Kernel's name. - */ - std::string name() const - { - return _name; - } - /** Get program id. - * - * @return program id. - */ - GLuint get_program() const - { - return _program; - } - /** Use current program. - * - * @return program id. - */ - void use(); - /** Unuse current program. - * - * @return program id. - */ - void unuse(); - /** Set argument value at index of shader params. - * - * @param[in] idx Index in shader params. - * @param[in] value Argument value to be set. - */ - template <class T> - void set_argument(unsigned int idx, T value) - { - if(idx >= _shader_arguments.size()) - { - _shader_arguments.resize(idx + 1, 0); - } - - unsigned int *p = reinterpret_cast<unsigned int *>(&value); - _shader_arguments[idx] = *p; - } - /** Clear shader arguments. - * - */ - void clear_arguments() - { - _shader_arguments.clear(); - } - /** Set shader params binding point. - * - * @param[in] binding Shader params binding point. - */ - void set_shader_params_binding_point(unsigned int binding) - { - _shader_params_binding_point = binding; - } - /** Update shader params. - * - */ - void update_shader_params(); - /** Clean up program and ubo. - * - */ - void cleanup(); - -private: - std::string _name; /**< Kernel name */ - GLuint _program; /**< Linked program id */ - std::vector<unsigned int> _shader_arguments; /**< Store all the values of the shader arguments */ - GLuint _shader_params_ubo_name; /**< Uniform buffer object name for shader parameters */ - GLuint _shader_params_binding_point; /**< The binding point of the uniform block for shader parameters */ - GLuint _shader_params_index; /**< The index of the uniform block */ - GLint _shader_params_size; /**< The uniform block data size in the shader */ - static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */ -}; - -/** GCKernelLibrary class */ -class GCKernelLibrary final -{ - using StringSet = std::set<std::string>; - -public: - /** Default Constructor. */ - GCKernelLibrary(); - /** Default Destructor */ - ~GCKernelLibrary(); - /** Prevent instances of this class from being copied */ - GCKernelLibrary(const GCKernelLibrary &) = delete; - /** Prevent instances of this class from being copied */ - const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete; - /** Get the static instance of @ref GCKernelLibrary. - * This method has been deprecated and will be removed in the next release. - * @return The static instance of GCKernelLibrary. - */ - static GCKernelLibrary &get(); - /** Initialises the kernel library. - * - * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded. - * @param[in] dpy (Optional) EGLdisplay set by external application. - * @param[in] ctx (Optional) EGLContext set by external application. - */ - void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT); - /** Sets the path that the shaders reside in. - * - * @param[in] shader_path Path of the shader. - */ - void set_shader_path(const std::string &shader_path); - /** Sets display and context to create kernel. - * - * @param[in] dpy EGLdisplay set by external application. - * @param[in] ctx EGLContext set by external application. - */ - void set_context(EGLDisplay dpy, EGLContext ctx); - /** Creates a kernel from the kernel library. - * - * @param[in] shader_name Shader name. - * @param[in] build_options_set Shader build options as a set. - * - * @return The created kernel. - */ - GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const; - /** Serializes and saves programs to a binary. */ - void save_binary(); - /** Load serialized binary with all the programs. */ - void load_binary(); - /** Setup a dummy fbo to workaround an issue on Galaxy S8. */ - void setup_dummy_fbo(); - -private: - /** Preprocess GLES shader - * - * @param[in] shader_source Source code of the shader to preprocess. - * - * @return Preprocessed GLES shader object. - */ - std::string preprocess_shader(const std::string &shader_source) const; - /** Load program and its dependencies. - * - * @param[in] program_name Name of the program to load. - */ - const GCProgram &load_program(const std::string &program_name) const; - /** Concatenates contents of a set into a single string. - * - * @param[in] s Input set to concatenate. - * - * @return Concatenated string. - */ - std::string stringify_set(const StringSet &s) const; - - EGLDisplay _display; /**< Underlying EGL Display. */ - EGLContext _context; /**< Underlying EGL Context. */ - GLuint _frame_buffer; /**< Dummy fbo */ - GLuint _tex_rt; /**< Dummy texture for render target */ - std::string _shader_path; /**< Path to the shaders folder. */ - mutable std::map<std::string, const GCProgram> _programs_map; /**< Map with all already loaded program data. */ - mutable std::map<std::string, const GCKernel> _built_programs_map; /**< Map with all already built program data. */ - static const std::map<std::string, std::string> _shader_program_map; /**< Map that associates kernel names with programs. */ - static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs. - Used for compile-time shader inclusion. */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_GCKERNELLIBRARY_H */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h deleted file mode 100644 index a1537ec152..0000000000 --- a/arm_compute/core/GLES_COMPUTE/GCKernels.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCKERNELS_H -#define ARM_COMPUTE_GCKERNELS_H - -/* Header regrouping all the GLES compute kernels */ -#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" -#include "arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h" - -#endif /* ARM_COMPUTE_GCKERNELS_H */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h deleted file mode 100644 index 7b2aad7cec..0000000000 --- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IGCKERNEL_H -#define ARM_COMPUTE_IGCKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/GPUTarget.h" - -#include "arm_compute/core/IKernel.h" - -namespace arm_compute -{ -class IGCTensor; -class Window; - -/** Common interface for all the GLES kernels */ -class IGCKernel : public IKernel -{ -public: - /** Constructor */ - IGCKernel(); - /** Returns a reference to the GLES kernel of this object. - * - * @return A reference to the GLES kernel of this object. - */ - GCKernel &kernel(); - - /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] binding_point Tensor's binding point in this kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); - - /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] binding_point Tensor's binding point in this kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); - - /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] binding_point Tensor's binding point in this kernel. - * @param[in] window Window the kernel will be executed on. - */ - void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); - - /** Returns the number of arguments enqueued per 1D tensor object. - * - * @return The number of arguments enqueues per 1D tensor object. - */ - unsigned int num_arguments_per_1D_tensor() const; - /** Returns the number of arguments enqueued per 2D tensor object. - * - * @return The number of arguments enqueues per 2D tensor object. - */ - unsigned int num_arguments_per_2D_tensor() const; - /** Returns the number of arguments enqueued per 3D tensor object. - * - * @return The number of arguments enqueues per 3D tensor object. - */ - unsigned int num_arguments_per_3D_tensor() const; - /** Enqueue the OpenGL ES shader to process the given window - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - virtual void run(const Window &window) = 0; - - /** Set the Local-Workgroup-Size hint - * - * @note This method should be called after the configuration of the kernel - * - * @param[in] lws_hint Local-Workgroup-Size to use - */ - void set_lws_hint(gles::NDRange &lws_hint) - { - _lws_hint = lws_hint; - } - - /** Set the targeted GPU architecture - * - * @param[in] target The targeted GPU architecture - */ - void set_target(GPUTarget target) - { - _target = target; - } - - /** Get the targeted GPU architecture - * - * @return The targeted GPU architecture. - */ - GPUTarget get_target() const - { - return _target; - } - -private: - /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. - * - * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. - * @param[in] tensor Tensor to set as an argument of the object's kernel. - * @param[in] binding_point Tensor's binding point in this kernel. - * @param[in] window Window the kernel will be executed on. - */ - template <unsigned int dimension_size> - void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); - - /** Returns the number of arguments enqueued per tensor object. - * - * @return The number of arguments enqueued per tensor object. - */ - template <unsigned int dimension_size> - unsigned int num_arguments_per_tensor() const; - -protected: - GCKernel _kernel; /**< GLES kernel to run */ - gles::NDRange _lws_hint; /**< Local workgroup size hint for the GLES kernel */ - GPUTarget _target; /**< The targeted GPU */ -}; - -/** Add the kernel to the command queue with the given window. - * - * @note Depending on the size of the window, this might translate into several jobs being enqueued. - * - * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. - * - * @param[in] kernel Kernel to enqueue - * @param[in] window Window the kernel has to process. - * @param[in] lws Local workgroup size requested, by default (1, 1, 1) - * - * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. - */ -void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U)); -} -#endif /*ARM_COMPUTE_IGCKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h deleted file mode 100644 index ae8fd40888..0000000000 --- a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IGCSIMPLE2DKERNEL_H -#define ARM_COMPUTE_IGCSIMPLE2DKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ -class IGCSimple2DKernel : public IGCSimpleKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window) override; -}; -} -#endif /*ARM_COMPUTE_IGCSIMPLE2DKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h deleted file mode 100644 index 40a21ee147..0000000000 --- a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IGCSIMPLE3DKERNEL_H -#define ARM_COMPUTE_IGCSIMPLE3DKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output. - * Both input tensor and output tensor must have at least 3 dimensions. - */ -class IGCSimple3DKernel : public IGCSimple2DKernel -{ -public: - // Inherited methods overridden: - void run(const Window &window) override; -}; -} -#endif /*ARM_COMPUTE_IGCSIMPLE3DKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h deleted file mode 100644 index c0f561ab5d..0000000000 --- a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IGCSIMPLEKERNEL_H -#define ARM_COMPUTE_IGCSIMPLEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/Helpers.h" - -namespace arm_compute -{ -/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */ -class IGCSimpleKernel : public IGCKernel -{ -public: - /** Constructor. */ - IGCSimpleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - IGCSimpleKernel(const IGCSimpleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete; - /** Allow instances of this class to be moved */ - IGCSimpleKernel(IGCSimpleKernel &&) = default; - /** Allow instances of this class to be moved */ - IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default; - /** Default destructor */ - ~IGCSimpleKernel() = default; - - /** Configure the kernel - * - * @param[in] input Source tensor. - * @param[out] output Destination tensor. - * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. - * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. - * @param[in] border_size (Optional) Size of the border. - */ - void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); - -protected: - const IGCTensor *_input; - IGCTensor *_output; -}; -} - -#endif /*ARM_COMPUTE_IGCSIMPLEKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h deleted file mode 100644 index c382095846..0000000000 --- a/arm_compute/core/GLES_COMPUTE/IGCTensor.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IGCTENSOR_H -#define ARM_COMPUTE_IGCTENSOR_H - -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/ITensor.h" - -#include <cstdint> - -namespace arm_compute -{ -/** Interface for GLES Compute tensor */ -class IGCTensor : public ITensor -{ -public: - /** Default constructor. */ - IGCTensor(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - IGCTensor(const IGCTensor &) = delete; - - /** Prevent instances of this class from being copy assigned (As this class contains pointers) */ - IGCTensor &operator=(const IGCTensor &) = delete; - - /** Allow instances of this class to be moved */ - IGCTensor(IGCTensor &&) = default; - - /** Allow instances of this class to be moved */ - IGCTensor &operator=(IGCTensor &&) = default; - - /** Virtual destructor */ - virtual ~IGCTensor() = default; - - /** Map on an allocated buffer. - * - * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - void map(bool blocking = true); - /** Unmap an allocated and mapped buffer. - */ - void unmap(); - /** Clear the contents of the tensor synchronously. - */ - void clear(); - - // Inherited methods overridden: - uint8_t *buffer() const override; - /** Interface to be implemented by the child class to return the tensor's gles compute buffer id. - * - * @return A SSBO buffer id. - */ - virtual GLuint gc_buffer() const = 0; - - /** Flag indicating whether the tensor has been left aligned by a kernel and therefore needs shifting. - * - * @return True if the tensor is left aligned. - */ - bool needs_shifting() const; - /** Set the flag indicating whether or not a tensor needs shifting. - * - * @param[in] needs_shifting Indicates if the tensor is left aligned or not. - * - */ - void set_needs_shifting(bool needs_shifting); - -protected: - /** Method to be implemented by the child class to map the SSBO. - * - * @param[in] blocking If true, then the mapping will be ready to use by the time - * this method returns, else it is the caller's responsibility - * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. - */ - virtual uint8_t *do_map(bool blocking) = 0; - /** Method to be implemented by the child class to unmap the SSBO. - * - * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before - * the memory is accessed by the device. - */ - virtual void do_unmap() = 0; - -private: - uint8_t *_mapping; - bool _needs_shifting; -}; - -/** Interface for GLES Compute image */ -using IGCImage = IGCTensor; -} -#endif /*ARM_COMPUTE_IGCTENSOR_H */ diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h deleted file mode 100644 index 445443602d..0000000000 --- a/arm_compute/core/GLES_COMPUTE/OpenGLES.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_OPENGLES_H -#define ARM_COMPUTE_OPENGLES_H - -#include "arm_compute/core/Log.h" - -#include <EGL/egl.h> -#include <EGL/eglext.h> -#include <EGL/eglplatform.h> -#include <GLES3/gl31.h> -#include <GLES3/gl3ext.h> -#include <cstddef> - -#ifdef ARM_COMPUTE_DEBUG_ENABLED -#define ARM_COMPUTE_GL_CHECK(x) \ - x; \ - { \ - GLenum error = glGetError(); \ - if(error != GL_NO_ERROR) \ - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \ - } -#else /* ARM_COMPUTE_DEBUG_ENABLED */ -#define ARM_COMPUTE_GL_CHECK(x) x -#endif /* ARM_COMPUTE_DEBUG_ENABLED */ - -namespace arm_compute -{ -namespace gles -{ -/** Class interface for specifying NDRange values. */ -class NDRange -{ -private: - size_t _sizes[3]; - size_t _dimensions; - -public: - /** Default constructor - resulting range has zero dimensions. */ - NDRange() - : _dimensions(0) - { - _sizes[0] = 0; - _sizes[1] = 0; - _sizes[2] = 0; - } - - /** Constructs one-dimensional range. - * - * @param[in] size0 Size of the first dimension. - */ - NDRange(size_t size0) - : _dimensions(1) - { - _sizes[0] = size0; - _sizes[1] = 1; - _sizes[2] = 1; - } - - /** Constructs two-dimensional range. - * - * @param[in] size0 Size of the first dimension. - * @param[in] size1 Size of the second dimension. - */ - NDRange(size_t size0, size_t size1) - : _dimensions(2) - { - _sizes[0] = size0; - _sizes[1] = size1; - _sizes[2] = 1; - } - - /** Constructs three-dimensional range. - * - * @param[in] size0 Size of the first dimension. - * @param[in] size1 Size of the second dimension. - * @param[in] size2 Size of the third dimension. - */ - NDRange(size_t size0, size_t size1, size_t size2) - : _dimensions(3) - { - _sizes[0] = size0; - _sizes[1] = size1; - _sizes[2] = size2; - } - - /** Conversion operator to const size_t *. - * - * @returns A pointer to the size of the first dimension. - */ - operator const size_t *() const - { - return _sizes; - } - - /** Queries the number of dimensions in the range. - * - * @returns The number of dimensions. - */ - size_t dimensions() const - { - return _dimensions; - } - - /** Returns the size of the object in bytes based on the runtime number of dimensions - * - * @returns The size of the object in bytes. - */ - size_t size() const - { - return _dimensions * sizeof(size_t); - } - - /** Returns the sizes array for each dimensions. - * - * @returns The sizes array - */ - size_t *get() - { - return _sizes; - } - - /** Returns the sizes array for each dimensions. - * - * @returns The sizes array - */ - const size_t *get() const - { - return _sizes; - } -}; - -static const NDRange NullRange; -static const NDRange Range_128_1 = NDRange(128, 1); -} // namespace gles - -/** Check if the OpenGL ES 3.1 API is available at runtime. - * - * @returns true if the OpenGL ES 3.1 API is available. - */ -bool opengles31_is_available(); -} // namespace arm_compute - -#endif /* ARM_COMPUTE_OPENGLES_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h deleted file mode 100644 index d55f98fa66..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the absolute difference kernel. - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class GCAbsoluteDifferenceKernel : public IGCKernel -{ -public: - /** Default constructor. */ - GCAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~GCAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output images. - * - * @param[in] input1 Source tensor. Data types supported: U8 - * @param[in] input2 Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - */ - void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input1; /**< Source tensor 1. */ - const IGCTensor *_input2; /**< Source tensor 2. */ - IGCTensor *_output; /**< Destination tensor. */ -}; -} -#endif /* ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h deleted file mode 100644 index 65e018a50a..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -// Forward declarations -class IGCTensor; -class GCCoreRuntimeContext; - -/** Interface for the activation layer kernel. */ -class GCActivationLayerKernel : public IGCKernel -{ -public: - /** Default constructor - * - * @param[in, out] ctx Core context to use - */ - explicit GCActivationLayerKernel(GCCoreRuntimeContext *ctx = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCActivationLayerKernel(const GCActivationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - GCActivationLayerKernel(GCActivationLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default; - /** Default destructor */ - ~GCActivationLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type should match the input data type. - * @param[in] act_info Activation layer information. - */ - void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - IGCTensor *_input; - IGCTensor *_output; - GCCoreRuntimeContext *_ctx; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h deleted file mode 100644 index 7e8159c638..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCArithmeticAdditionKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H -#define ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the arithmetic addition kernel - * - * Arithmetic addition is computed by: - * @f[ output(x,y) = input1(x,y) + input2(x,y) @f] - */ -class GCArithmeticAdditionKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCArithmeticAdditionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCArithmeticAdditionKernel(const GCArithmeticAdditionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCArithmeticAdditionKernel &operator=(const GCArithmeticAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - GCArithmeticAdditionKernel(GCArithmeticAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - GCArithmeticAdditionKernel &operator=(GCArithmeticAdditionKernel &&) = default; - /** Default destructor */ - ~GCArithmeticAdditionKernel() = default; - /** Initialise the kernel's inputs, output and convertion policy. - * - * @param[in] input1 First tensor input. Data types supported: F16. - * @param[in] input2 Second tensor input. Data types supported: F16. - * @param[out] output Output tensor. Data types supported: F16. - * @param[in] policy Policy to use to handle overflow. - */ - void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref GCArithmeticAdditionKernel - * - * @param[in] input1 First tensor input info. Data types supported: F16. - * @param[in] input2 Second tensor input info. Data types supported: F16. - * @param[out] output Output tensor info. Data types supported: F16. - * @param[in] policy Policy to use to handle overflow. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input1; /**< Source tensor 1 */ - const IGCTensor *_input2; /**< Source tensor 2 */ - IGCTensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_GCARITHMETICADDITIONKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h deleted file mode 100644 index eb7a99c59e..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the BatchNormalization layer kernel. - */ -class GCBatchNormalizationLayerKernel : public IGCKernel -{ -public: - /** Constructor */ - GCBatchNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~GCBatchNormalizationLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (optional) Small value to avoid division with zero. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta = nullptr, const IGCTensor *gamma = nullptr, float epsilon = 0.001f, - ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref GCBatchNormalizationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - const IGCTensor *_mean; - const IGCTensor *_var; - const IGCTensor *_beta; - const IGCTensor *_gamma; - float _epsilon; -}; -} -#endif /*ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h deleted file mode 100644 index d96fb56771..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_GCCOL2IMKERNEL_H -#define ARM_COMPUTE_GCCOL2IMKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the col2im reshaping kernel. - * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel. - * - * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: - * - * @f[ - * \left( \begin{array}{ccccccccc} - * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccc} - * a0 & a1 & a2 \\ - * a3 & a4 & a5 \\ - * a6 & a7 & a8 \\ - * \end{array} \right) - * @f] - */ -class GCCol2ImKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCCol2ImKernel(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCCol2ImKernel(const GCCol2ImKernel &) = delete; - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete; - - /** Allow instances of this class to be moved */ - GCCol2ImKernel(GCCol2ImKernel &&) = default; - - /** Allow instances of this class to be moved */ - GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default; - - /** Default destructor */ - ~GCCol2ImKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Data types supported: F16/F32 - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - */ - void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> convolved_dims); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - std::pair<unsigned int, unsigned int> _convolved_dims; -}; -} - -#endif /*ARM_COMPUTE_GCCOL2IMKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h deleted file mode 100644 index 9c7754947a..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class GCDepthConcatenateLayerKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthConcatenateLayerKernel(const GCDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthConcatenateLayerKernel &operator=(const GCDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - GCDepthConcatenateLayerKernel(GCDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - GCDepthConcatenateLayerKernel &operator=(GCDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~GCDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - int _depth_offset; -}; -} -#endif /* ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h deleted file mode 100644 index 8faa54a205..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H -#define ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class GCDepthwiseConvolutionLayer3x3Kernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDepthwiseConvolutionLayer3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthwiseConvolutionLayer3x3Kernel(const GCDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDepthwiseConvolutionLayer3x3Kernel &operator=(const GCDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Default Move Constructor. */ - GCDepthwiseConvolutionLayer3x3Kernel(GCDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator */ - GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: F16. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); - - // Inherited methods overridden: - void run(const Window &window) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const IGCTensor *_input; - IGCTensor *_output; - const IGCTensor *_weights; - const IGCTensor *_biases; - unsigned int _conv_stride_x; - unsigned int _conv_stride_y; - unsigned int _conv_pad_left; - unsigned int _conv_pad_top; - gles::NDRange _lws; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCDEPTHWISECONVOLUTIONKERNEL3x3_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h deleted file mode 100644 index 43f94f8662..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the direct convolution kernel. - */ -template <unsigned int kernel_size> -class GCDirectConvolutionLayerKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDirectConvolutionLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default; - /** Default destructor */ - ~GCDirectConvolutionLayerKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. - * @param[in] bias Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - BorderSize border_size() const override; - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - const IGCTensor *_bias; - const IGCTensor *_weights; - IGCTensor *_output; - BorderSize _border_size; - int _conv_stride_x; - int _conv_stride_y; - int _conv_pad_x; - int _conv_pad_y; - gles::NDRange _lws; -}; - -/** Interface for the 1x1 direct convolution kernel */ -using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>; -/** Interface for the 3x3 direct convolution kernel */ -using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>; -/** Interface for the 5x5 direct convolution kernel */ -using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>; -} -#endif /*ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h deleted file mode 100644 index e3dda67a8a..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutLayerKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H -#define ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the dropout layer kernel. - * - * Dropout is used to improve over-fit on neural networks. - * - */ -class GCDropoutLayerKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCDropoutLayerKernel(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDropoutLayerKernel(const GCDropoutLayerKernel &) = delete; - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCDropoutLayerKernel &operator=(const GCDropoutLayerKernel &) = delete; - - /** Allow instances of this class to be moved */ - GCDropoutLayerKernel(GCDropoutLayerKernel &&) = default; - - /** Allow instances of this class to be moved */ - GCDropoutLayerKernel &operator=(GCDropoutLayerKernel &&) = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor for this op. Data types supported: F16/F32 - * @param[out] mask The mask tensor. Data types supported: Same as @p input - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] ratio Dropout ratio - * @param[in] forward Forward or backward propagation - * - */ - void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_mask; - IGCTensor *_output; - unsigned int _num_elems_processed_per_iteration; -}; -} - -#endif /*ARM_COMPUTE_GCDROPOUTLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h deleted file mode 100644 index 4dd7aa0ec1..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCFILLBORDERKERNEL_H -#define ARM_COMPUTE_GCFILLBORDERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for filling the border of a kernel */ -class GCFillBorderKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCFillBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCFillBorderKernel(const GCFillBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - GCFillBorderKernel(GCFillBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default; - /** Default destructor */ - ~GCFillBorderKernel() = default; - - /** Initialise the kernel's input, output and border mode. - * - * @param[in,out] tensor Tensor to process Data types supported: F16/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - */ - void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - - /** Function to set the constant value on fill border kernel depending on type. - * - * @param[in] idx Index of the kernel argument to set. - * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. - */ - template <class T> - void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); - - // Inherited methods overridden: - void run(const Window &window) override; - bool is_parallelisable() const override; - -private: - const IGCTensor *_tensor; -}; -} -#endif /*ARM_COMPUTE_GCFILLBORDERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h deleted file mode 100644 index cbc60da443..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H -#define ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4 - * - * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ - * \end{array} \right) - * @f] - * - * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] - */ -class GCGEMMInterleave4x4Kernel : public IGCKernel -{ -public: - /** Default constructor */ - GCGEMMInterleave4x4Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete; - /** Allow instances of this class to be moved */ - GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default; - /** Allow instances of this class to be moved */ - GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: F16, F32 - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const IGCTensor *input, IGCTensor *output); - - // Inherited methods overridden - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; -}; -} -#endif /* ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h deleted file mode 100644 index 95f991ee73..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H -#define ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -/** Interface to add a bias to each row of the input tensor - * - */ -class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCGEMMMatrixAccumulateBiasesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Allow instances of this class to be moved */ - GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Allow instances of this class to be moved */ - GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 - * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input - */ - void configure(IGCTensor *accum, const IGCTensor *biases); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - IGCTensor *_accum; - const IGCTensor *_biases; - gles::NDRange _lws; -}; -} - -#endif /*ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h deleted file mode 100644 index e4157a1327..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H -#define ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. - * The matrices must have the same dimensions - * - * @note This kernel is computed if and only if beta != 0.0. - */ -class GCGEMMMatrixAdditionKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCGEMMMatrixAdditionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default; - /** Initialise the kernel's input, output and beta value - * - * @note The input and output tensors must have the same dimensions - * - * @param[in] input Input tensor (Matrix C). Data types supported: F32 - * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input - * @param[in] beta Weight of matrix C - */ - void configure(const IGCTensor *input, IGCTensor *output, float beta); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; -}; -} - -#endif /* ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h deleted file mode 100644 index 4dcae2e536..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GPUTarget.h" - -namespace arm_compute -{ -class IGCTensor; - -/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha - * - * @attention The second input tensor must have at least 2 dimensions (matrix) - * - */ -class GCGEMMMatrixMultiplyKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCGEMMMatrixMultiplyKernel(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete; - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete; - - /** Allow instances of this class to be moved */ - GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default; - - /** Allow instances of this class to be moved */ - GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default; - - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - */ - void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref GCGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel - * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] gpu_target GPU Target - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, - GPUTarget gpu_target); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input0; - const IGCTensor *_input1; - IGCTensor *_output; -}; -} -#endif /* ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h deleted file mode 100644 index 29a4c8d209..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H -#define ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) - * - * Following an example of how the transposition1xW works when the input data type is F32 - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - * - * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) - * - */ -class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: F16, F32 - * @param[out] output Output tensor. Data type supported: same as @p input - */ - void configure(const IGCTensor *input, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; -}; -} -#endif /* ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h deleted file mode 100644 index 7d1a53c4c3..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_GCIM2COLKERNEL_H -#define ARM_COMPUTE_GCIM2COLKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; -class Size2D; - -/** Interface for the im2col reshape kernel. - * - * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. - * It is used to transform a convolution to a plain matrix multiplication. - * - * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * = - * \left( \begin{array}{ccccccccc} - * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ - * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ - * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ - * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - */ -class GCIm2ColKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCIm2ColKernel(const GCIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - GCIm2ColKernel(GCIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 - * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - */ - void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); - - // Inherited methods overridden: - void run(const Window &window) override; - - /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 - * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, - * while every dimension above represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); - -private: - /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel. - */ - void run_reduced(const Window &window); - /** run the generic convolution layer input reshape kernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel. - */ - void run_generic(const Window &window); - - /** Common signature for the kernel to run */ - using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &); - -private: - const IGCTensor *_input; - IGCTensor *_output; - std::pair<unsigned int, unsigned int> _convolved_dims; - std::pair<unsigned int, unsigned int> _kernel_dims; - unsigned int _num_elems_processed_per_iteration; - Im2ColFunction _run_func; -}; -} - -#endif /*ARM_COMPUTE_GCIM2COLKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h deleted file mode 100644 index dd00caecfb..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the normalization layer kernel. - */ -class GCNormalizationLayerKernel : public IGCKernel -{ -public: - /** Constructor */ - GCNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default; - /** Default destrutor */ - ~GCNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F32. - * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data types should match the input type. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window) override; - BorderSize border_size() const override; - -private: - const IGCTensor *_input; - const IGCTensor *_squared_input; - IGCTensor *_output; - BorderSize _border_size; -}; -} -#endif /*ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h deleted file mode 100644 index 5156da8b2c..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizePlanarYUVLayerKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTEH -#define ARM_COMPUTE_GCNORMALIZEPLANARYUVLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the NormalizePlanarYUV layer kernel. - */ -class GCNormalizePlanarYUVLayerKernel : public IGCKernel -{ -public: - /** Constructor */ - GCNormalizePlanarYUVLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCNormalizePlanarYUVLayerKernel(const GCNormalizePlanarYUVLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCNormalizePlanarYUVLayerKernel &operator=(const GCNormalizePlanarYUVLayerKernel &) = delete; - /** Default Move Constructor. */ - GCNormalizePlanarYUVLayerKernel(GCNormalizePlanarYUVLayerKernel &&) = default; - /** Default move assignment operator */ - GCNormalizePlanarYUVLayerKernel &operator=(GCNormalizePlanarYUVLayerKernel &&) = default; - /** Default destructor */ - ~GCNormalizePlanarYUVLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: F16. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the feature maps [FM]. - * Data types supported: same as @p input - */ - void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *std); - /** Static function to check if given info will lead to a valid configuration of @ref GCNormalizePlanarYUVLayerKernel - * - * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. - * Data types supported: F16. - * @param[out] output Destination tensor info. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input - * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels. - * Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - const IGCTensor *_mean; - const IGCTensor *_std; -}; -} -#endif /*ARM_COMPUTE_GCNORMALIZEPLANARYUVLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h deleted file mode 100644 index 0c4b656175..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H -#define ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the pixelwise multiplication kernel. - * - */ -class GCPixelWiseMultiplicationKernel : public IGCKernel -{ -public: - /** Default constructor.*/ - GCPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - */ - void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input1; - const IGCTensor *_input2; - IGCTensor *_output; -}; -} - -#endif /*ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h deleted file mode 100644 index 7a2fb84f34..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the pooling layer kernel */ -class GCPoolingLayerKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default; - /** Default destructor */ - ~GCPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices = nullptr); - - /** Static function to check if given info will lead to a valid configuration of @ref GCPoolingLayerKernel - * - * @param[in] input Source tensor info. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); - - // Inherited methods overridden: - void run(const Window &window) override; - BorderSize border_size() const override; - -private: - const IGCTensor *_input; - IGCTensor *_output; - IGCTensor *_indices; - PoolingLayerInfo _pool_info; - BorderSize _border_size; - unsigned int _num_elems_processed_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h deleted file mode 100644 index 754f15cbd8..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCScaleKernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCSCALEKERNEL_H -#define ARM_COMPUTE_GCSCALEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the scale kernel */ -class GCScaleKernel : public IGCSimple3DKernel -{ -public: - /** Initialise the kernel's inputs, output and interpolation policy - * - * @param[in] input Source tensor. Data types supported: F16 - * @param[out] output Destination tensor. Data types supported: Same as @p input - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy Interpolation type to use - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER - */ - void configure(const IGCTensor *input, IGCTensor *output, InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy = SamplingPolicy::CENTER); - - // Inherited methods overridden: - void run(const Window &window) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCSCALEKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h deleted file mode 100644 index 280efe11f8..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** Interface for the identifying the max value of 1D Logits */ -class GCLogits1DMaxKernel : public IGCSimple3DKernel -{ -public: - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const IGCTensor *input, IGCTensor *output); -}; - -/** Interface for shifting the logits values around the max value and exponentiating the result */ -class GCLogits1DShiftExpSumKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCLogits1DShiftExpSumKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete; - /** Allow instances of this class to be moved */ - GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default; - /** Allow instances of this class to be moved */ - GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[in] max Max values tensor. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input - */ - void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - const IGCTensor *_max; - IGCTensor *_output; - IGCTensor *_sum; -}; - -/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ -class GCLogits1DNormKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCLogits1DNormKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete; - /** Allow instances of this class to be moved */ - GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default; - /** Allow instances of this class to be moved */ - GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32 - * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - const IGCTensor *_sum; - IGCTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h deleted file mode 100644 index 5243e54daf..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCTensorShiftKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCTENSORSHIFTKERNEL_H -#define ARM_COMPUTE_GCTENSORSHIFTKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" - -namespace arm_compute -{ -class IGCTensor; -/** Interface for the kernel to shift valid data on a tensor. - * - * For example shifting 3x3 valid data with padding of 1 to right: - * @f[ - * \left( \begin{array}{ccccc} - * 0 & 0 & 0 & 0 & 0 \\ - * a00 & a01 & a02 & 0 & 0 \\ - * a10 & a11 & a12 & 0 & 0 \\ - * a20 & a21 & a22 & 0 & 0 \\ - * 0 & 0 & 0 & 0 & 0 \\ - * \end{array} \right) - * = - * \left( \begin{array}{ccccc} - * 0 & 0 & 0 & 0 & 0 \\ - * 0 & a00 & a01 & a02 & 0 \\ - * 0 & a10 & a11 & a12 & 0 \\ - * 0 & a20 & a21 & a22 & 0 \\ - * 0 & 0 & 0 & 0 & 0 \\ - * \end{array} \right) - * @f] - */ -class GCTensorShiftKernel : public IGCKernel -{ -public: - /** Default constructor */ - GCTensorShiftKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCTensorShiftKernel(const GCTensorShiftKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCTensorShiftKernel &operator=(const GCTensorShiftKernel &) = delete; - /** Allow instances of this class to be moved */ - GCTensorShiftKernel(GCTensorShiftKernel &&) = default; - /** Allow instances of this class to be moved */ - GCTensorShiftKernel &operator=(GCTensorShiftKernel &&) = default; - /** Default destructor */ - ~GCTensorShiftKernel() = default; - /** Set the input of the kernel. - * - * @param[in,out] input Source tensor. Data types supported: F16/F32 - */ - void configure(IGCTensor *input); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - IGCTensor *_input; - gles::NDRange _lws; - int _left_padding; -}; -} -#endif /*ARM_COMPUTE_GCTENSORSHIFTKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h deleted file mode 100644 index a981ae6d1f..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCTRANSPOSEKERNEL_H -#define ARM_COMPUTE_GCTRANSPOSEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" - -namespace arm_compute -{ -class IGCTensor; - -/** OpenGL ES kernel which transposes the elements of a matrix. - * - * [width, height, batch] -> [height, width, batch] - * - */ -class GCTransposeKernel : public IGCSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: F16/F32 - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const IGCTensor *input, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; -}; -} -#endif /* ARM_COMPUTE_GCTRANSPOSEKERNEL_H */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h deleted file mode 100644 index 134346b8da..0000000000 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCWeightsReshapeKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H -#define ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H - -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" - -namespace arm_compute -{ -/** GLES Compute kernel to perform reshaping on the weights used by convolution and locally connected layer - * - * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref GCIm2ColKernel can transform a convolution to a matrix multiplication. - * - * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: - * @f[ - * \left( \begin{array}{ccc} - * a000 & a001 & a002 \\ - * a010 & a011 & a012 \\ - * a020 & a021 & a022 \\ - * \end{array} \right) - * \left( \begin{array}{ccc} - * a100 & a101 & a102 \\ - * a110 & a111 & a112 \\ - * a120 & a121 & a122 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ - * \end{array} \right) - * @f] - */ -class GCWeightsReshapeKernel : public IGCKernel -{ -public: - /** Constructor.*/ - GCWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCWeightsReshapeKernel(const GCWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - GCWeightsReshapeKernel &operator=(const GCWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - GCWeightsReshapeKernel(GCWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - GCWeightsReshapeKernel &operator=(GCWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~GCWeightsReshapeKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, batches] if unshared. Data types supported: F16, F32 - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, batches] if unshared. Data types supported: Same as @p input - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input - */ - void configure(const IGCTensor *input, const IGCTensor *biases, IGCTensor *output); - - // Inherited methods overridden: - void run(const Window &window) override; - -private: - const IGCTensor *_input; - const IGCTensor *_biases; - IGCTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_GCWEIGHTSRESHAPEKERNEL_H */
\ No newline at end of file diff --git a/arm_compute/core/GPUTarget.h b/arm_compute/core/GPUTarget.h index 4959ee5e8a..b107a52d9f 100644 --- a/arm_compute/core/GPUTarget.h +++ b/arm_compute/core/GPUTarget.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,10 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_GPUTARGET_H -#define ARM_COMPUTE_GPUTARGET_H +#ifndef ACL_ARM_COMPUTE_CORE_GPUTARGET_H +#define ACL_ARM_COMPUTE_CORE_GPUTARGET_H -#include "arm_compute/core/Helpers.h" +#include "support/Traits.h" #include <string> @@ -33,25 +33,38 @@ namespace arm_compute /** Available GPU Targets */ enum class GPUTarget { - UNKNOWN = 0x101, - GPU_ARCH_MASK = 0xF00, - MIDGARD = 0x100, - BIFROST = 0x200, - VALHALL = 0x300, - T600 = 0x110, - T700 = 0x120, - T800 = 0x130, - G71 = 0x210, - G72 = 0x220, - G51 = 0x230, - G51BIG = 0x231, - G51LIT = 0x232, - G52 = 0x240, - G52LIT = 0x241, - G76 = 0x250, - G77 = 0x310, - TBOX = 0x320, - TODX = 0x330, + UNKNOWN = 0x101, + GPU_ARCH_MASK = 0xF00, + GPU_GENERATION_MASK = 0x0F0, + MIDGARD = 0x100, + BIFROST = 0x200, + VALHALL = 0x300, + FIFTHGEN = 0X400, + T600 = 0x110, + T700 = 0x120, + T800 = 0x130, + G71 = 0x210, + G72 = 0x220, + G51 = 0x221, + G51BIG = 0x222, + G51LIT = 0x223, + G31 = 0x224, + G76 = 0x230, + G52 = 0x231, + G52LIT = 0x232, + G77 = 0x310, + G57 = 0x311, + G78 = 0x320, + G68 = 0x321, + G78AE = 0x330, + G710 = 0x340, + G610 = 0x341, + G510 = 0x342, + G310 = 0x343, + G715 = 0x350, + G615 = 0x351, + G720 = 0x410, + G620 = 0X411 }; /** Enable bitwise operations on GPUTarget enumerations */ @@ -104,4 +117,4 @@ inline bool gpu_target_is_in(GPUTarget target_to_check, GPUTarget target) return target_to_check == target; } } // namespace arm_compute -#endif /* ARM_COMPUTE_GPUTARGET_H */ +#endif // ACL_ARM_COMPUTE_CORE_GPUTARGET_H diff --git a/arm_compute/core/HOGInfo.h b/arm_compute/core/HOGInfo.h deleted file mode 100644 index 3cc472b274..0000000000 --- a/arm_compute/core/HOGInfo.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_HOGINFO_H -#define ARM_COMPUTE_HOGINFO_H - -#include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Types.h" - -#include <cstddef> - -namespace arm_compute -{ -/** Store the HOG's metadata */ -class HOGInfo -{ -public: - /** Default constructor */ - HOGInfo(); - /** Default destructor */ - virtual ~HOGInfo() = default; - /** Allow instances of this class to be copy constructed */ - HOGInfo(const HOGInfo &) = default; - /** Allow instances of this class to be copied */ - HOGInfo &operator=(const HOGInfo &) = default; - /** Allow instances of this class to be move constructed */ - HOGInfo(HOGInfo &&) = default; - /** Allow instances of this class to be moved */ - HOGInfo &operator=(HOGInfo &&) = default; - /** Constructor - * - * @param[in] cell_size Cell size in pixels - * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. - * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. - * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size - * @param[in] num_bins Number of histogram bins for each cell - * @param[in] normalization_type (Optional) Normalization type to use for each block - * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method - * @param[in] phase_type (Optional) Type of @ref PhaseType - */ - HOGInfo(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, - HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); - /** Initialize the metadata structure with the given parameters - * - * @param[in] cell_size Cell size in pixels - * @param[in] block_size Block size in pixels. Must be a multiple of cell_size. - * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride. - * @param[in] block_stride Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size - * @param[in] num_bins Number of histogram bins for each cell - * @param[in] normalization_type (Optional) Normalization type to use for each block - * @param[in] l2_hyst_threshold (Optional) Threshold used for L2HYS_NORM normalization method - * @param[in] phase_type (Optional) Type of @ref PhaseType - */ - void init(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins, - HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED); - /** The cell size in pixels - * - * @return The cell size in pixels - */ - const Size2D &cell_size() const; - /** The block size in pixels - * - * @return The block size in pixels - */ - const Size2D &block_size() const; - /** The detection window size in pixels - * - * @return The detection window size in pixels - */ - const Size2D &detection_window_size() const; - /** The block stride in pixels. The block stride is the distance between 2 consecutive blocks - * - * @return The block stride in pixels - */ - const Size2D &block_stride() const; - /** The number of histogram bins for each cell - * - * @return The number of histogram bins for each cell - */ - size_t num_bins() const; - /** The normalization type - * - * @return The normalization type - */ - HOGNormType normalization_type() const; - /** Threshold used for L2HYS_NORM normalization type - * - * @return Threshold used for L2HYS_NORM normalization type - */ - float l2_hyst_threshold() const; - /** The type of @ref PhaseType - * - * @return The type of @ref PhaseType - */ - PhaseType phase_type() const; - /** The size of HOG descriptor - * - * @return The size of HOG descriptor - */ - size_t descriptor_size() const; - /** Calculates the number of cells for each block - * - * @return The Size2D data object which stores the number of cells along the x and y directions - */ - Size2D num_cells_per_block() const; - - /** Calculates the number of cells per block stride - * - * @return The Size2D data object which stores the number of cells per block stride along the x and y directions - */ - Size2D num_cells_per_block_stride() const; - /** Calculates the number of block positions for the given image size - * - * @param[in] image_size The input image size data object - * - * @return The Size2D data object which stores the number of block positions along the x and y directions - */ - Size2D num_block_positions_per_image(const Size2D &image_size) const; - -private: - Size2D _cell_size; - Size2D _block_size; - Size2D _detection_window_size; - Size2D _block_stride; - size_t _num_bins; - HOGNormType _normalization_type; - float _l2_hyst_threshold; - PhaseType _phase_type; - size_t _descriptor_size; -}; -} -#endif /*ARM_COMPUTE_HOGINFO_H */ diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 09c672ecfa..960201510a 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,23 +24,17 @@ #ifndef ARM_COMPUTE_HELPERS_H #define ARM_COMPUTE_HELPERS_H -#include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Steps.h" -#include "arm_compute/core/Strides.h" -#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/ITensor.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "support/MemorySupport.h" #include <array> #include <cstddef> #include <cstdint> -#include <memory> #include <tuple> -#include <type_traits> -#include <utility> namespace arm_compute { @@ -48,307 +42,6 @@ class IKernel; class ITensor; class ITensorInfo; -/** Disable bitwise operations by default */ -template <typename T> -struct enable_bitwise_ops -{ - static constexpr bool value = false; /**< Disabled */ -}; - -#ifndef DOXYGEN_SKIP_THIS -template <typename T> -typename std::enable_if<enable_bitwise_ops<T>::value, T>::type operator&(T lhs, T rhs) -{ - using underlying_type = typename std::underlying_type<T>::type; - return static_cast<T>(static_cast<underlying_type>(lhs) & static_cast<underlying_type>(rhs)); -} -#endif /* DOXYGEN_SKIP_THIS */ - -/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object - * It also calls the kernel's configuration. - * - * @param[in] args All the arguments that need pass to kernel's configuration. - * - * @return A unique pointer pointed to a CL/GLES kernel object - */ -template <typename Kernel, typename... T> -std::unique_ptr<Kernel> create_configure_kernel(T &&... args) -{ - std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>(); - k->configure(std::forward<T>(args)...); - return k; -} - -/** Helper function to create and return a unique_ptr pointed to a CL/GLES kernel object - * - * @return A unique pointer pointed to a Kernel kernel object - */ -template <typename Kernel> -std::unique_ptr<Kernel> create_kernel() -{ - std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>(); - return k; -} - -namespace traits -{ -/** Check if a type T is contained in a tuple Tuple of types */ -template <typename T, typename Tuple> -struct is_contained; - -template <typename T> -struct is_contained<T, std::tuple<>> : std::false_type -{ -}; - -template <typename T, typename... Ts> -struct is_contained<T, std::tuple<T, Ts...>> : std::true_type -{ -}; - -template <typename T, typename U, typename... Ts> -struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...>> -{ -}; -} - -/** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -template <typename T> -inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - const T a10 = *(pixel_ptr + stride); - const T a11 = *(pixel_ptr + stride + 1); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - - return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); -} - -/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8 and in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * @param[in] iq_info Input QuantizationInfo - * @param[in] oq_info Output QuantizationInfo - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = dequantize_qasymm8(*pixel_ptr, iq_info); - const float a01 = dequantize_qasymm8(*(pixel_ptr + 1), iq_info); - const float a10 = dequantize_qasymm8(*(pixel_ptr + stride), iq_info); - const float a11 = dequantize_qasymm8(*(pixel_ptr + stride + 1), iq_info); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; - return static_cast<uint8_t>(quantize_qasymm8(res, oq_info)); -} - -/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be QASYMM8_SIGNED and in single channel format. - * - * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. - * @param[in] stride Stride to access the bottom-left and bottom-right pixel values - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * @param[in] iq_info Input QuantizationInfo - * @param[in] oq_info Output QuantizationInfo - * - * @note dx and dy must be in the range [0, 1.0] - * - * @return The bilinear interpolated pixel value - */ -inline int8_t delta_bilinear_c1_quantized(const int8_t *pixel_ptr, size_t stride, float dx, float dy, UniformQuantizationInfo iq_info, UniformQuantizationInfo oq_info) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = dequantize_qasymm8_signed(*pixel_ptr, iq_info); - const float a01 = dequantize_qasymm8_signed(*(pixel_ptr + 1), iq_info); - const float a10 = dequantize_qasymm8_signed(*(pixel_ptr + stride), iq_info); - const float a11 = dequantize_qasymm8_signed(*(pixel_ptr + stride + 1), iq_info); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; - return static_cast<int8_t>(quantize_qasymm8_signed(res, oq_info)); -} - -/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the top pixel value of a single channel input. - * @param[in] stride Stride to access the bottom pixel value - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer - * - * @note dy must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template <typename T> -inline T delta_linear_c1_y(const T *pixel_ptr, size_t stride, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dy1 = 1.0f - dy; - - const T a00 = *pixel_ptr; - const T a10 = *(pixel_ptr + stride); - - const float w1 = dy1; - const float w3 = dy; - - return static_cast<T>(a00 * w1 + a10 * w3); -} -/** Computes linear interpolation using the pointer to the left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. - * - * @param[in] pixel_ptr Pointer to the left pixel value of a single channel input. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer - * - * @note dx must be in the range [0, 1.0] - * - * @return The linear interpolated pixel value - */ -template <typename T> -inline T delta_linear_c1_x(const T *pixel_ptr, float dx) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const T a00 = *pixel_ptr; - const T a01 = *(pixel_ptr + 1); - - const float dx1 = 1.0f - dx; - - const float w1 = dx1; - const float w2 = dx; - - return static_cast<T>(a00 * w1 + a01 * w2); -} -/** Return the pixel at (x,y) using bilinear interpolation. - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. - * @param[in] stride Stride in bytes of the image; - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template <typename T> -inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - const int32_t xi = std::floor(x); - const int32_t yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); -} - -/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input - * - * @warning Only works if the iterator was created with an IImage - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. - * @param[in] stride Stride in bytes of the image - * @param[in] width Width of the image - * @param[in] height Height of the image - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using bilinear interpolation. - */ -template <typename T> -inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - x = std::max(-1.f, std::min(x, static_cast<float>(width))); - y = std::max(-1.f, std::min(y, static_cast<float>(height))); - - const float xi = std::floor(x); - const float yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - if(dx == 0.0f) - { - if(dy == 0.0f) - { - return static_cast<T>(first_pixel_ptr[static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride]); - } - return delta_linear_c1_y(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dy); - } - if(dy == 0.0f) - { - return delta_linear_c1_x(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, dx); - } - return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy); -} - -/** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 - * - * @note The interpolation area depends on the width and height ration of the input and output images - * @note Currently average of the contributing pixels is calculated - * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. - * @param[in] stride Stride in bytes of the image - * @param[in] width Width of the image - * @param[in] height Height of the image - * @param[in] wr Width ratio among the input image width and output image width. - * @param[in] hr Height ratio among the input image height and output image height. - * @param[in] x X position of the wanted pixel - * @param[in] y Y position of the wanted pixel - * - * @return The pixel at (x, y) using area interpolation. - */ -inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y); - /** Iterator updated by @ref execute_window_loop for each window element */ class Iterator { @@ -362,6 +55,16 @@ public: */ Iterator(const ITensor *tensor, const Window &window); + /** Create a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window. + * + * @param[in] num_dims The number of dimensions. + * @param[in] strides The strides in bytes. + * @param[in] buffer The data buffer. + * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor. + * @param[in] window The window which will be used to iterate over the tensor. + */ + Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window); + /** Increment the iterator along the specified dimension of the step value associated to the dimension. * * @warning It is the caller's responsibility to call increment(dimension+1) when reaching the end of a dimension, the iterator will not check for overflow. @@ -376,7 +79,7 @@ public: * * @return The current position of the iterator in bytes relative to the first element. */ - constexpr int offset() const; + constexpr size_t offset() const; /** Return a pointer to the current pixel. * @@ -393,18 +96,27 @@ public: void reset(size_t dimension); private: + /** Initialize a container iterator for the tensor with the specified number of dimensions, stride, buffer pointer and window. + * + * @param[in] num_dims The number of dimensions. + * @param[in] strides The strides in bytes. + * @param[in] buffer The data buffer. + * @param[in] offset The offset in bytes from the beginning of the buffer to the first element of the tensor. + * @param[in] window The window which will be used to iterate over the tensor. + */ + void initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &window); + uint8_t *_ptr; class Dimension { public: - constexpr Dimension() - : _dim_start(0), _stride(0) + constexpr Dimension() : _dim_start(0), _stride(0) { } - int _dim_start; - int _stride; + size_t _dim_start; + size_t _stride; }; std::array<Dimension, Coordinates::num_max_dimensions> _dims; @@ -419,180 +131,7 @@ private: * @param[in,out] iterators Tensor iterators which will be updated by this function before calling lambda_function. */ template <typename L, typename... Ts> -inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators); - -/** Update window and padding size for each of the access patterns. - * - * First the window size is reduced based on all access patterns that are not - * allowed to modify the padding of the underlying tensor. Then the padding of - * the remaining tensors is increased to match the window. - * - * @param[in] win Window that is used by the kernel. - * @param[in] patterns Access patterns used to calculate the final window and padding. - * - * @return True if the window has been changed. Changes to the padding do not - * influence the returned value. - */ -template <typename... Ts> -bool update_window_and_padding(Window &win, Ts &&... patterns) -{ - bool window_changed = false; - - utility::for_each([&](const IAccessWindow & w) - { - window_changed |= w.update_window_if_needed(win); - }, - patterns...); - - bool padding_changed = false; - - utility::for_each([&](IAccessWindow & w) - { - padding_changed |= w.update_padding_if_needed(win); - }, - patterns...); - - return window_changed; -} - -/** Calculate the maximum window for a given tensor shape and border setting - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); - -/** Calculate the maximum window for a given tensor shape and border setting - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_window(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) -{ - return calculate_max_window(info.valid_region(), steps, skip_border, border_size); -} - -/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. The border region will be excluded from the window. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_window_horizontal(const ValidRegion &valid_region, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()); - -/** Calculate the maximum window used by a horizontal kernel for a given tensor shape and border setting - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] skip_border (Optional) If true exclude the border region from the window. - * @param[in] border_size (Optional) Border size. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_window_horizontal(const ITensorInfo &info, const Steps &steps = Steps(), bool skip_border = false, BorderSize border_size = BorderSize()) -{ - return calculate_max_window_horizontal(info.valid_region(), steps, skip_border, border_size); -} - -/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. - * - * @param[in] valid_region Valid region object defining the shape of the tensor space for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] border_size (Optional) Border size. The border region will be included in the window. - * - * @return The maximum window the kernel can be executed on. - */ -Window calculate_max_enlarged_window(const ValidRegion &valid_region, const Steps &steps = Steps(), BorderSize border_size = BorderSize()); - -/** Calculate the maximum window for a given tensor shape and border setting. The window will also includes the border. - * - * @param[in] info Tensor info object defining the shape of the object for which the window is created. - * @param[in] steps (Optional) Number of elements processed for each step. - * @param[in] border_size (Optional) Border size. The border region will be included in the window. - * - * @return The maximum window the kernel can be executed on. - */ -inline Window calculate_max_enlarged_window(const ITensorInfo &info, const Steps &steps = Steps(), BorderSize border_size = BorderSize()) -{ - return calculate_max_enlarged_window(info.valid_region(), steps, border_size); -} - -/** Intersect multiple valid regions. - * - * @param[in] regions Valid regions. - * - * @return Intersection of all regions. - */ -template <typename... Ts> -ValidRegion intersect_valid_regions(const Ts &... regions) -{ - auto intersect = [](const ValidRegion & r1, const ValidRegion & r2) -> ValidRegion - { - ValidRegion region; - - for(size_t d = 0; d < std::min(r1.anchor.num_dimensions(), r2.anchor.num_dimensions()); ++d) - { - region.anchor.set(d, std::max(r1.anchor[d], r2.anchor[d])); - } - - for(size_t d = 0; d < std::min(r1.shape.num_dimensions(), r2.shape.num_dimensions()); ++d) - { - region.shape.set(d, std::min(r1.shape[d], r2.shape[d])); - } - - return region; - }; - - return utility::foldl(intersect, regions...); -} - -/** Create a strides object based on the provided strides and the tensor dimensions. - * - * @param[in] info Tensor info object providing the shape of the tensor for unspecified strides. - * @param[in] stride_x Stride to be used in X dimension (in bytes). - * @param[in] fixed_strides Strides to be used in higher dimensions starting at Y (in bytes). - * - * @return Strides object based on the specified strides. Missing strides are - * calculated based on the tensor shape and the strides of lower dimensions. - */ -template <typename T, typename... Ts> -inline Strides compute_strides(const ITensorInfo &info, T stride_x, Ts &&... fixed_strides) -{ - const TensorShape &shape = info.tensor_shape(); - - // Create strides object - Strides strides(stride_x, fixed_strides...); - - for(size_t i = 1 + sizeof...(Ts); i < info.num_dimensions(); ++i) - { - strides.set(i, shape[i - 1] * strides[i - 1]); - } - - return strides; -} - -/** Create a strides object based on the tensor dimensions. - * - * @param[in] info Tensor info object used to compute the strides. - * - * @return Strides object based on element size and tensor shape. - */ -template <typename... Ts> -inline Strides compute_strides(const ITensorInfo &info) -{ - return compute_strides(info, info.element_size()); -} +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators); /** Permutes given Dimensions according to a permutation vector * @@ -605,7 +144,7 @@ template <typename T> inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm) { auto dimensions_copy = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end()); - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { T dimension_val = (perm[i] < dimensions.num_dimensions()) ? dimensions_copy[perm[i]] : 0; dimensions.set(i, dimension_val); @@ -622,86 +161,13 @@ inline void permute(Dimensions<T> &dimensions, const PermutationVector &perm) inline void permute(TensorShape &shape, const PermutationVector &perm) { TensorShape shape_copy = shape; - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { size_t dimension_val = (perm[i] < shape.num_dimensions()) ? shape_copy[perm[i]] : 1; - shape.set(i, dimension_val, false); // Avoid changes in _num_dimension + shape.set(i, dimension_val, false, false); // Avoid changes in _num_dimension } } -/** Auto initialize the tensor info (shape, number of channels and data type) if the current assignment is empty. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] shape New shape. - * @param[in] num_channels New number of channels. - * @param[in] data_type New data type - * @param[in] quantization_info (Optional) New quantization info - * - * @return True if the tensor info has been initialized - */ -bool auto_init_if_empty(ITensorInfo &info, - const TensorShape &shape, - int num_channels, DataType data_type, - QuantizationInfo quantization_info = QuantizationInfo()); - -/** Auto initialize the tensor info using another tensor info. - * - * @param info_sink Tensor info used to check and assign - * @param info_source Tensor info used to assign - * - * @return True if the tensor info has been initialized - */ -bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source); - -/** Set the shape to the specified value if the current assignment is empty. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] shape New shape. - * - * @return True if the shape has been changed. - */ -bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape); - -/** Set the format, data type and number of channels to the specified value if - * the current data type is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] format New format. - * - * @return True if the format has been changed. - */ -bool set_format_if_unknown(ITensorInfo &info, Format format); - -/** Set the data type and number of channels to the specified value if - * the current data type is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] data_type New data type. - * - * @return True if the data type has been changed. - */ -bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type); - -/** Set the data layout to the specified value if - * the current data layout is unknown. - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] data_layout New data layout. - * - * @return True if the data type has been changed. - */ -bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout); - -/** Set the quantization info to the specified value if - * the current quantization info is empty and the data type of asymmetric quantized type - * - * @param[in,out] info Tensor info used to check and assign. - * @param[in] quantization_info Quantization info - * - * @return True if the quantization info has been changed. - */ -bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info); - /** Helper function to calculate the Valid Region for Scale. * * @param[in] src_info Input tensor info used to check. @@ -712,8 +178,11 @@ bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantiza * * @return The corresponding valid region */ -ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape, - InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined); +ValidRegion calculate_valid_region_scale(const ITensorInfo &src_info, + const TensorShape &dst_shape, + InterpolationPolicy interpolate_policy, + SamplingPolicy sampling_policy, + bool border_undefined); /** Convert a linear index into n-dimensional coordinates. * @@ -733,6 +202,22 @@ inline Coordinates index2coords(const TensorShape &shape, int index); */ inline int coords2index(const TensorShape &shape, const Coordinates &coord); +/** Returns a static map used to find an index or dimension based on a data layout + * + * *** Layouts *** + * + * *** 4D *** + * [N C H W] + * [3 2 1 0] + * [N H W C] + * + * * *** 5D *** + * [N C D H W] + * [4 3 2 1 0] + * [N D H W C] + */ +const std::map<DataLayout, std::vector<DataLayoutDimension>> &get_layout_map(); + /** Get the index of the given dimension. * * @param[in] data_layout The data layout. @@ -740,7 +225,8 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord); * * @return The int conversion of the requested data layout index. */ -inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension); +inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, + const DataLayoutDimension &data_layout_dimension); /** Get the DataLayoutDimension of a given index and layout. * @@ -749,22 +235,7 @@ inline size_t get_data_layout_dimension_index(const DataLayout data_layout, cons * * @return The dimension which this index is requested for. */ -inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index); - -/** Calculate the normalization dimension index for a given normalization type - * - * @param[in] layout Data layout of the input and output tensor - * @param[in] info Normalization info - * - * @return Normalization dimension index - */ -inline unsigned int get_normalization_dimension_index(DataLayout layout, const NormalizationLayerInfo &info) -{ - const unsigned int width_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::WIDTH); - const unsigned int channel_idx = get_data_layout_dimension_index(layout, DataLayoutDimension::CHANNEL); - - return info.is_in_map() ? width_idx : channel_idx; -} +inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index); /** Calculate the number of output tiles required by Winograd Convolution layer. This utility function can be used by the Winograd input transform * to know the number of tiles on the x and y direction @@ -776,10 +247,17 @@ inline unsigned int get_normalization_dimension_index(DataLayout layout, const N * * @return the number of output tiles along the x and y directions of size "output_tile_size" */ -inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, const Size2D &kernel_size, const Size2D &output_tile_size, const PadStrideInfo &conv_info) +inline Size2D compute_winograd_convolution_tiles(const Size2D &in_dims, + const Size2D &kernel_size, + const Size2D &output_tile_size, + const PadStrideInfo &conv_info) { - int num_tiles_x = std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / static_cast<float>(output_tile_size.width)); - int num_tiles_y = std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / static_cast<float>(output_tile_size.height)); + int num_tiles_x = + std::ceil((in_dims.width - (kernel_size.width - 1) + conv_info.pad_left() + conv_info.pad_right()) / + static_cast<float>(output_tile_size.width)); + int num_tiles_y = + std::ceil((in_dims.height - (kernel_size.height - 1) + conv_info.pad_top() + conv_info.pad_bottom()) / + static_cast<float>(output_tile_size.height)); // Clamp in case we provide paddings but we have 1D convolution num_tiles_x = std::min(num_tiles_x, static_cast<int>(in_dims.width)); @@ -808,40 +286,12 @@ inline T wrap_around(T x, T m) */ inline Coordinates &convert_negative_axis(Coordinates &coords, int max_value) { - for(unsigned int i = 0; i < coords.num_dimensions(); ++i) + for (unsigned int i = 0; i < coords.num_dimensions(); ++i) { coords[i] = wrap_around(coords[i], max_value); } return coords; } - -/** Given an integer value, this function returns the next power of two - * - * @param[in] x Input value - * - * @return the next power of two - */ -inline unsigned int get_next_power_two(unsigned int x) -{ - // Decrement by 1 - x--; - - // Shift right by 1 - x |= x >> 1u; - // Shift right by 2 - x |= x >> 2u; - // Shift right by 4 - x |= x >> 4u; - // Shift right by 8 - x |= x >> 8u; - // Shift right by 16 - x |= x >> 16u; - - // Increment by 1 - x++; - - return x; -} } // namespace arm_compute #include "arm_compute/core/Helpers.inl" diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index 233d46bb86..60a21e9418 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,68 +22,19 @@ * SOFTWARE. */ #include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" #include <cmath> #include <numeric> namespace arm_compute { -inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - // Calculate sampling position - float in_x = (x + 0.5f) * wr - 0.5f; - float in_y = (y + 0.5f) * hr - 0.5f; - - // Get bounding box offsets - int x_from = std::floor(x * wr - 0.5f - in_x); - int y_from = std::floor(y * hr - 0.5f - in_y); - int x_to = std::ceil((x + 1) * wr - 0.5f - in_x); - int y_to = std::ceil((y + 1) * hr - 0.5f - in_y); - - // Clamp position to borders - in_x = std::max(-1.f, std::min(in_x, static_cast<float>(width))); - in_y = std::max(-1.f, std::min(in_y, static_cast<float>(height))); - - // Clamp bounding box offsets to borders - x_from = ((in_x + x_from) < -1) ? -1 : x_from; - y_from = ((in_y + y_from) < -1) ? -1 : y_from; - x_to = ((in_x + x_to) > width) ? (width - in_x) : x_to; - y_to = ((in_y + y_to) > height) ? (height - in_y) : y_to; - - // Get pixel index - const int xi = std::floor(in_x); - const int yi = std::floor(in_y); - - // Bounding box elements in each dimension - const int x_elements = (x_to - x_from + 1); - const int y_elements = (y_to - y_from + 1); - ARM_COMPUTE_ERROR_ON(x_elements == 0 || y_elements == 0); - - // Sum pixels in area - int sum = 0; - for(int j = yi + y_from, je = yi + y_to; j <= je; ++j) - { - const uint8_t *ptr = first_pixel_ptr + j * stride + xi + x_from; - sum = std::accumulate(ptr, ptr + x_elements, sum); - } - - // Return average - return sum / (x_elements * y_elements); -} - template <size_t dimension> struct IncrementIterators { template <typename T, typename... Ts> - static void unroll(T &&it, Ts &&... iterators) + static void unroll(T &&it, Ts &&...iterators) { - auto increment = [](T && it) - { - it.increment(dimension); - }; + auto increment = [](T &&it) { it.increment(dimension); }; utility::for_each(increment, std::forward<T>(it), std::forward<Ts>(iterators)...); } static void unroll() @@ -96,14 +47,14 @@ template <size_t dim> struct ForEachDimension { template <typename L, typename... Ts> - static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators) { const auto &d = w[dim - 1]; - for(auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators < dim - 1 >::unroll(iterators...)) + for (auto v = d.start(); v < d.end(); v += d.step(), IncrementIterators<dim - 1>::unroll(iterators...)) { id.set(dim - 1, v); - ForEachDimension < dim - 1 >::unroll(w, id, lambda_function, iterators...); + ForEachDimension<dim - 1>::unroll(w, id, lambda_function, iterators...); } } }; @@ -112,7 +63,7 @@ template <> struct ForEachDimension<0> { template <typename L, typename... Ts> - static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&... iterators) + static void unroll(const Window &w, Coordinates &id, L &&lambda_function, Ts &&...iterators) { ARM_COMPUTE_UNUSED(w, iterators...); lambda_function(id); @@ -120,49 +71,60 @@ struct ForEachDimension<0> }; template <typename L, typename... Ts> -inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&... iterators) +inline void execute_window_loop(const Window &w, L &&lambda_function, Ts &&...iterators) { w.validate(); - for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) + for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_ERROR_ON(w[i].step() == 0); } Coordinates id; - ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), std::forward<Ts>(iterators)...); + ForEachDimension<Coordinates::num_max_dimensions>::unroll(w, id, std::forward<L>(lambda_function), + std::forward<Ts>(iterators)...); } -inline constexpr Iterator::Iterator() - : _ptr(nullptr), _dims() +inline constexpr Iterator::Iterator() : _ptr(nullptr), _dims() { } -inline Iterator::Iterator(const ITensor *tensor, const Window &win) - : Iterator() +inline Iterator::Iterator(const ITensor *tensor, const Window &win) : Iterator() { ARM_COMPUTE_ERROR_ON(tensor == nullptr); ARM_COMPUTE_ERROR_ON(tensor->info() == nullptr); - const ITensorInfo *info = tensor->info(); - const Strides &strides = info->strides_in_bytes(); + initialize(tensor->info()->num_dimensions(), tensor->info()->strides_in_bytes(), tensor->buffer(), + tensor->info()->offset_first_element_in_bytes(), win); +} + +inline Iterator::Iterator(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win) + : Iterator() +{ + initialize(num_dims, strides, buffer, offset, win); +} + +inline void +Iterator::initialize(size_t num_dims, const Strides &strides, uint8_t *buffer, size_t offset, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(buffer == nullptr); - _ptr = tensor->buffer() + info->offset_first_element_in_bytes(); + _ptr = buffer + offset; //Initialize the stride for each dimension and calculate the position of the first element of the iteration: - for(unsigned int n = 0; n < info->num_dimensions(); ++n) + for (unsigned int n = 0; n < num_dims; ++n) { _dims[n]._stride = win[n].step() * strides[n]; - std::get<0>(_dims)._dim_start += strides[n] * win[n].start(); + std::get<0>(_dims)._dim_start += static_cast<size_t>(strides[n]) * win[n].start(); } //Copy the starting point to all the dimensions: - for(unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = 1; n < Coordinates::num_max_dimensions; ++n) { _dims[n]._dim_start = std::get<0>(_dims)._dim_start; } - ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, info->num_dimensions()); + ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(win, num_dims); } inline void Iterator::increment(const size_t dimension) @@ -171,13 +133,13 @@ inline void Iterator::increment(const size_t dimension) _dims[dimension]._dim_start += _dims[dimension]._stride; - for(unsigned int n = 0; n < dimension; ++n) + for (unsigned int n = 0; n < dimension; ++n) { _dims[n]._dim_start = _dims[dimension]._dim_start; } } -inline constexpr int Iterator::offset() const +inline constexpr size_t Iterator::offset() const { return _dims.at(0)._dim_start; } @@ -193,100 +155,12 @@ inline void Iterator::reset(const size_t dimension) _dims[dimension]._dim_start = _dims[dimension + 1]._dim_start; - for(unsigned int n = 0; n < dimension; ++n) + for (unsigned int n = 0; n < dimension; ++n) { _dims[n]._dim_start = _dims[dimension]._dim_start; } } -inline bool auto_init_if_empty(ITensorInfo &info, - const TensorShape &shape, - int num_channels, - DataType data_type, - QuantizationInfo quantization_info) -{ - if(info.tensor_shape().total_size() == 0) - { - info.set_data_type(data_type); - info.set_num_channels(num_channels); - info.set_tensor_shape(shape); - info.set_quantization_info(quantization_info); - return true; - } - - return false; -} - -inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source) -{ - if(info_sink.tensor_shape().total_size() == 0) - { - info_sink.set_data_type(info_source.data_type()); - info_sink.set_num_channels(info_source.num_channels()); - info_sink.set_tensor_shape(info_source.tensor_shape()); - info_sink.set_quantization_info(info_source.quantization_info()); - info_sink.set_data_layout(info_source.data_layout()); - return true; - } - - return false; -} - -inline bool set_shape_if_empty(ITensorInfo &info, const TensorShape &shape) -{ - if(info.tensor_shape().total_size() == 0) - { - info.set_tensor_shape(shape); - return true; - } - - return false; -} - -inline bool set_format_if_unknown(ITensorInfo &info, Format format) -{ - if(info.data_type() == DataType::UNKNOWN) - { - info.set_format(format); - return true; - } - - return false; -} - -inline bool set_data_type_if_unknown(ITensorInfo &info, DataType data_type) -{ - if(info.data_type() == DataType::UNKNOWN) - { - info.set_data_type(data_type); - return true; - } - - return false; -} - -inline bool set_data_layout_if_unknown(ITensorInfo &info, DataLayout data_layout) -{ - if(info.data_layout() == DataLayout::UNKNOWN) - { - info.set_data_layout(data_layout); - return true; - } - - return false; -} - -inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) -{ - if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) - { - info.set_quantization_info(quantization_info); - return true; - } - - return false; -} - inline Coordinates index2coords(const TensorShape &shape, int index) { int num_elements = shape.total_size(); @@ -294,9 +168,9 @@ inline Coordinates index2coords(const TensorShape &shape, int index) ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]!"); ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape!"); - Coordinates coord{ 0 }; + Coordinates coord{0}; - for(int d = shape.num_dimensions() - 1; d >= 0; --d) + for (int d = shape.num_dimensions() - 1; d >= 0; --d) { num_elements /= shape[d]; coord.set(d, index / num_elements); @@ -315,7 +189,7 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord) int index = 0; int stride = 1; - for(unsigned int d = 0; d < coord.num_dimensions(); ++d) + for (unsigned int d = 0; d < coord.num_dimensions(); ++d) { index += coord[d] * stride; stride *= shape[d]; @@ -324,61 +198,23 @@ inline int coords2index(const TensorShape &shape, const Coordinates &coord) return index; } -inline size_t get_data_layout_dimension_index(const DataLayout data_layout, const DataLayoutDimension data_layout_dimension) +inline size_t get_data_layout_dimension_index(const DataLayout &data_layout, + const DataLayoutDimension &data_layout_dimension) { - ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!"); - - /* Return the index based on the data layout - * [N C H W] - * [3 2 1 0] - * [N H W C] - */ - switch(data_layout_dimension) - { - case DataLayoutDimension::CHANNEL: - return (data_layout == DataLayout::NCHW) ? 2 : 0; - break; - case DataLayoutDimension::HEIGHT: - return (data_layout == DataLayout::NCHW) ? 1 : 2; - break; - case DataLayoutDimension::WIDTH: - return (data_layout == DataLayout::NCHW) ? 0 : 1; - break; - case DataLayoutDimension::BATCHES: - return 3; - break; - default: - break; - } - ARM_COMPUTE_ERROR("Data layout index not supported!"); + ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, + "Cannot retrieve the dimension index for an unknown layout!"); + const auto &dims = get_layout_map().at(data_layout); + const auto &it = std::find(dims.cbegin(), dims.cend(), data_layout_dimension); + ARM_COMPUTE_ERROR_ON_MSG(it == dims.cend(), "Invalid dimension for the given layout."); + return it - dims.cbegin(); } -inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout data_layout, const size_t index) +inline DataLayoutDimension get_index_data_layout_dimension(const DataLayout &data_layout, const size_t index) { - ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, "Cannot retrieve the dimension index for an unknown layout!"); - - /* Return the index based on the data layout - * [N C H W] - * [3 2 1 0] - * [N H W C] - */ - switch(index) - { - case 0: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::WIDTH : DataLayoutDimension::CHANNEL; - break; - case 1: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::HEIGHT : DataLayoutDimension::WIDTH; - break; - case 2: - return (data_layout == DataLayout::NCHW) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::HEIGHT; - break; - case 3: - return DataLayoutDimension::BATCHES; - break; - default: - ARM_COMPUTE_ERROR("Index value not supported!"); - break; - } + ARM_COMPUTE_ERROR_ON_MSG(data_layout == DataLayout::UNKNOWN, + "Cannot retrieve the layout dimension for an unknown layout!"); + const auto &dims = get_layout_map().at(data_layout); + ARM_COMPUTE_ERROR_ON_MSG(index >= dims.size(), "Invalid index for the given layout."); + return dims[index]; } } // namespace arm_compute diff --git a/arm_compute/core/IAccessWindow.h b/arm_compute/core/IAccessWindow.h index 227d1c4bb2..9c9fb90915 100644 --- a/arm_compute/core/IAccessWindow.h +++ b/arm_compute/core/IAccessWindow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -100,7 +100,10 @@ public: * @return a valid region. * */ - virtual ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const = 0; + virtual ValidRegion compute_valid_region(const Window &window, + ValidRegion input_valid_region, + bool border_undefined, + BorderSize border_size) const = 0; }; /** Implementation of a rectangular access pattern. */ @@ -161,7 +164,10 @@ public: * @param[in] border_undefined (Optional) Undefined borders are excluded from the valid region. * @param[in] border_size (Optional) Size of the border around the XY-plane of the tensor. */ - void set_valid_region(const Window &window, const ValidRegion &input_valid_region, bool border_undefined = false, const BorderSize &border_size = BorderSize(0)); + void set_valid_region(const Window &window, + const ValidRegion &input_valid_region, + bool border_undefined = false, + const BorderSize &border_size = BorderSize(0)); /** Compute the valid region based on access pattern, valid region of the inputs and border mode. * @@ -189,7 +195,10 @@ public: * @return a valid region. * */ - ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override; + ValidRegion compute_valid_region(const Window &window, + ValidRegion input_valid_region, + bool border_undefined, + BorderSize border_size) const override; bool update_window_if_needed(Window &window) const override; bool update_padding_if_needed(const Window &window) override; diff --git a/arm_compute/core/IArray.h b/arm_compute/core/IArray.h index c6a1499698..3471fc9a86 100644 --- a/arm_compute/core/IArray.h +++ b/arm_compute/core/IArray.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,30 +25,24 @@ #define ARM_COMPUTE_IARRAY_H #include "arm_compute/core/Error.h" + #include <cstddef> #include <cstdint> namespace arm_compute { -struct KeyPoint; -struct Coordinates2D; -struct DetectionWindow; -class Size2D; - /** Array of type T */ template <class T> class IArray { public: /** Default constructor */ - IArray() - : _num_values(0), _max_size(0) {}; + IArray() : _num_values(0), _max_size(0){}; /** Constructor: initializes an array which can contain up to max_num_points values * * @param[in] max_num_values Maximum number of values the array will be able to stored */ - IArray(size_t max_num_values) - : _num_values(0), _max_size(max_num_values) + IArray(size_t max_num_values) : _num_values(0), _max_size(max_num_values) { } /** Maximum number of values which can be stored in this array @@ -78,7 +72,7 @@ public: bool push_back(const T &val) { ARM_COMPUTE_ERROR_ON(0 == _max_size); - if(_num_values >= max_num_values()) + if (_num_values >= max_num_values()) { _num_values = max_num_values() + 1; return false; @@ -135,14 +129,6 @@ private: size_t _num_values; size_t _max_size; }; -/** Interface for Array of Key Points. */ -using IKeyPointArray = IArray<KeyPoint>; -/** Interface for Array of 2D Coordinates. */ -using ICoordinates2DArray = IArray<Coordinates2D>; -/** Interface for Array of Detection Windows. */ -using IDetectionWindowArray = IArray<DetectionWindow>; -/** Interface for Array of 2D Sizes. */ -using ISize2DArray = IArray<Size2D>; /** Interface for Array of uint8s. */ using IUInt8Array = IArray<uint8_t>; /** Interface for Array of uint16s. */ @@ -155,5 +141,5 @@ using IInt16Array = IArray<int16_t>; using IInt32Array = IArray<int32_t>; /** Interface for Array of floats. */ using IFloatArray = IArray<float>; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_IARRAY_H */ diff --git a/arm_compute/core/IDevice.h b/arm_compute/core/IDevice.h index 5cffe646d4..12efa91e19 100644 --- a/arm_compute/core/IDevice.h +++ b/arm_compute/core/IDevice.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,7 +34,6 @@ enum class DeviceType { NEON, CL, - GLES }; /** Interface for device object */ diff --git a/arm_compute/core/IDistribution.h b/arm_compute/core/IDistribution.h deleted file mode 100644 index cd6f25fd47..0000000000 --- a/arm_compute/core/IDistribution.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IDISTRIBUTION_H -#define ARM_COMPUTE_IDISTRIBUTION_H - -#include <cstddef> -#include <cstdint> - -namespace arm_compute -{ -/** Interface for distribution objects */ -class IDistribution -{ -public: - /** Default virtual destructor */ - virtual ~IDistribution() = default; - /** Returns the dimensions of the distribution. - * - * @note This is fixed to 1-dimensional distribution for now. - * @return Dimensions of the distribution. - */ - virtual size_t dimensions() const = 0; - /** Returns the total size in bytes of the distribution. - * - * @return Total size of the distribution in bytes. - */ - virtual size_t size() const = 0; - /** Returns a pointer to the start of the distribution. - * Other elements of the array can be accessed using buffer()[idx] for 0 <= idx < num_bins() - * - * @return Pointer to the start of the distribution. - */ - virtual uint32_t *buffer() const = 0; - /** Clears the distribution by setting every element to zero. */ - void clear() const; -}; -} -#endif /* ARM_COMPUTE_IDISTRIBUTION_H */ diff --git a/arm_compute/core/IDistribution1D.h b/arm_compute/core/IDistribution1D.h deleted file mode 100644 index 081ba580db..0000000000 --- a/arm_compute/core/IDistribution1D.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IDISTRIBUTION1D_H -#define ARM_COMPUTE_IDISTRIBUTION1D_H - -#include "arm_compute/core/IDistribution.h" - -#include <cstddef> -#include <cstdint> - -namespace arm_compute -{ -/** 1D Distribution interface */ -class IDistribution1D : public IDistribution -{ -public: - /** Constructor: Creates a 1D Distribution of a consecutive interval [offset, offset + range - 1] - * defined by a start offset and valid range, divided equally into num_bins parts. - * - * @param[in] num_bins The number of bins the distribution is divided in. - * @param[in] offset The start of the values to use. - * @param[in] range The total number of the consecutive values of the distribution interval. - */ - IDistribution1D(size_t num_bins, int32_t offset, uint32_t range); - /** Returns the number of bins that the distribution has. - * - * @return Number of bins of the distribution. - */ - size_t num_bins() const; - /** Returns the offset of the distribution. - * - * @return Offset of the distribution. - */ - int32_t offset() const; - /** Returns the range of the distribution. - * - * @return Range of the distribution. - */ - uint32_t range() const; - /** Returns the window of the distribution, which is the range divided by the number of bins. - * - * @note If range is not divided by the number of bins then it is invalid. - * - * @return Window of the distribution. - */ - uint32_t window() const; - /** Sets the range of the distribution. - * - * @param[in] range New range of the distribution to be set. - */ - void set_range(uint32_t range); - - // Inherited methods overridden: - size_t size() const override; - size_t dimensions() const override; - -private: - size_t _num_bins; /**< Number of bins. */ - int32_t _offset; /**< Offset, which indicate the start of the usable values. */ - uint32_t _range; /**< The total number of consecutive values of the distribution interval */ -}; -} -#endif /* ARM_COMPUTE_IDISTRIBUTION1D_H */ diff --git a/arm_compute/core/IHOG.h b/arm_compute/core/IHOG.h deleted file mode 100644 index bf8bd73087..0000000000 --- a/arm_compute/core/IHOG.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IHOG_H -#define ARM_COMPUTE_IHOG_H - -#include "arm_compute/core/Types.h" - -#include <cstddef> - -namespace arm_compute -{ -class HOGInfo; -/** Interface for HOG data-object */ -class IHOG -{ -public: - /** Interface to be implemented by the child class to return the HOG's metadata - * - * @return A pointer to the HOG's metadata. - */ - virtual const HOGInfo *info() const = 0; - /** Default virtual destructor */ - virtual ~IHOG() = default; - /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - * - * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1] - * - * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - */ - virtual float *descriptor() const = 0; -}; -} -#endif /* ARM_COMPUTE_IHOG_H */ diff --git a/arm_compute/core/IKernel.h b/arm_compute/core/IKernel.h index cb1ddb1d7f..403a2c724e 100644 --- a/arm_compute/core/IKernel.h +++ b/arm_compute/core/IKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,11 @@ public: * @return The maximum window the kernel can be executed on. */ const Window &window() const; + /** Function to check if the embedded window of this kernel has been configured + * + * @return True if the windows has been configured + */ + bool is_window_configured() const; protected: /** Configure the kernel's window @@ -68,5 +73,5 @@ protected: private: Window _window; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_IKERNEL_H */ diff --git a/arm_compute/core/ILut.h b/arm_compute/core/ILut.h deleted file mode 100644 index d1a03af969..0000000000 --- a/arm_compute/core/ILut.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ILUT_H -#define ARM_COMPUTE_ILUT_H - -#include "arm_compute/core/Types.h" - -#include <cstddef> - -namespace arm_compute -{ -/** Lookup Table object interface. */ -class ILut -{ -public: - /** Default virtual destructor */ - virtual ~ILut() = default; - /** Returns the total number of elements in the LUT. - * - * @return Total number of elements. - */ - virtual size_t num_elements() const = 0; - /** Indicates the offset that needs to be applied to the raw index before performing a lookup in the LUT. - * - * @return The normalization offset. - */ - virtual uint32_t index_offset() const = 0; - /** Returns the total size in bytes of the LUT. - * - * @return Total size of the LUT in bytes. - */ - virtual size_t size_in_bytes() const = 0; - /** Returns the type of the LUT. - * - * @return The type of the LUT. - */ - virtual DataType type() const = 0; - /** Returns a pointer to the start of the LUT. - * Other elements of the LUT can be accessed using buffer()[idx] for 0 <= idx < num_elements(). - * - * @return Pointer to the start of the lut. - */ - virtual uint8_t *buffer() const = 0; - /** Clears the LUT by setting every element to zero. */ - virtual void clear() = 0; -}; -} -#endif /* ARM_COMPUTE_ILUT_H */ diff --git a/arm_compute/core/IMultiHOG.h b/arm_compute/core/IMultiHOG.h deleted file mode 100644 index ab79fac154..0000000000 --- a/arm_compute/core/IMultiHOG.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IMULTIHOG_H -#define ARM_COMPUTE_IMULTIHOG_H - -#include "arm_compute/core/IHOG.h" - -#include <cstddef> - -namespace arm_compute -{ -/** Interface for storing multiple HOG data-objects */ -class IMultiHOG -{ -public: - /** Default destructor */ - virtual ~IMultiHOG() = default; - /** The number of HOG models stored - * - * @return The number of HOG models stored - */ - virtual size_t num_models() const = 0; - /** Return a pointer to the requested HOG model - * - * @param[in] index The index of the wanted HOG model. - * - * @return A pointer pointed to the HOG model - */ - virtual IHOG *model(size_t index) = 0; - /** Return a const pointer to the requested HOG model - * - * @param[in] index The index of the wanted HOG model. - * - * @return A const pointer pointed to the HOG model - */ - virtual const IHOG *model(size_t index) const = 0; -}; -} - -#endif /* ARM_COMPUTE_IMULTIHOG_H */ diff --git a/arm_compute/core/IMultiImage.h b/arm_compute/core/IMultiImage.h deleted file mode 100644 index 3abdfed8a8..0000000000 --- a/arm_compute/core/IMultiImage.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IMULTIIMAGE_H -#define ARM_COMPUTE_IMULTIIMAGE_H - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; -class MultiImageInfo; - -/** Interface for multi-planar images */ -class IMultiImage -{ -public: - /** Destructor */ - virtual ~IMultiImage() = default; - /** Interface to be implemented by the child class to return the multi-planar image's metadata - * - * @return A pointer to the image's metadata. - */ - virtual const MultiImageInfo *info() const = 0; - /** Return a pointer to the requested plane of the image. - * - * @param[in] index The index of the wanted planed. - * - * @return A pointer pointed to the plane - */ - virtual IImage *plane(unsigned int index) = 0; - /** Return a constant pointer to the requested plane of the image. - * - * @param[in] index The index of the wanted planed. - * - * @return A constant pointer pointed to the plane - */ - virtual const IImage *plane(unsigned int index) const = 0; -}; -} -#endif /*ARM_COMPUTE_IMULTIIMAGE_H */ diff --git a/arm_compute/core/IPyramid.h b/arm_compute/core/IPyramid.h deleted file mode 100644 index b2a74656b6..0000000000 --- a/arm_compute/core/IPyramid.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_IPYRAMID_H -#define ARM_COMPUTE_IPYRAMID_H - -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PyramidInfo.h" -#include "arm_compute/core/Types.h" - -#include <cstddef> - -namespace arm_compute -{ -/** Interface for pyramid data-object */ -class IPyramid -{ -public: - /** Default virtual destructor */ - virtual ~IPyramid() = default; - /** Interface to be implemented by the child class to return the Pyramid's metadata - * - * @return A pointer to the Pyramid's metadata. - */ - virtual const PyramidInfo *info() const = 0; - /** Retrieves a level of the pyramid as a ITensor pointer - * - * @param[in] index The index of the level, such that index is less than levels. - * - * @return An ITensor pointer - */ - virtual ITensor *get_pyramid_level(size_t index) const = 0; -}; -} - -#endif /* ARM_COMPUTE_IPYRAMID_H */ diff --git a/arm_compute/core/ITensor.h b/arm_compute/core/ITensor.h index 501279eb25..aad8313261 100644 --- a/arm_compute/core/ITensor.h +++ b/arm_compute/core/ITensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ namespace arm_compute { class Coordinates; -/** Interface for NEON tensor */ +/** Interface for CPU tensor */ class ITensor { public: @@ -90,11 +90,13 @@ public: bool is_used() const; /** Marks a tensor as unused */ void mark_as_unused() const; + /** Marks a tensor as used */ + void mark_as_used() const; private: - mutable bool _is_used = { true }; /**< Flag that marks if the tensor is used or not */ + mutable bool _is_used = {true}; /**< Flag that marks if the tensor is used or not */ }; using IImage = ITensor; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ITENSOR_H */ diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h index f2b4c155aa..c42f4b57a1 100644 --- a/arm_compute/core/ITensorInfo.h +++ b/arm_compute/core/ITensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,18 +28,46 @@ #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ICloneable.h" #include "arm_compute/core/utils/misc/Utility.h" +#include "support/ICloneable.h" + #include <cstddef> namespace arm_compute { +class QuantizationInfo; +// Note: Any changes to the fields of the class below that have setters should be mirrored +// (if possible) in the auto_init_if_empty function in AutoConfiguration.h + /** Store the tensor's metadata */ class ITensorInfo : public misc::ICloneable<ITensorInfo> { public: + using TensorDimsState = std::vector<int>; + /** An id that uniquely identifies an ITensorInfo within some domain (e.g. a workload) + */ + using Id = int32_t; + /** An invalid tensor id within a domain */ + static constexpr Id invalid_tensor_id = 0; + /** Get the value representing dynamic dimension state + * + * @return Value representing dynamic dimension state + * + */ + static constexpr int32_t get_dynamic_state_value() + { + return _dynamic_dimension; + } + /** Get the value representing static dimension state + * + * @return Value representing static dimension state + * + */ + static constexpr int32_t get_static_state_value() + { + return _static_dimension; + } /** Default virtual destructor */ virtual ~ITensorInfo() = default; /** Set the data type to the specified value. @@ -81,6 +109,17 @@ public: * @return Reference to this ITensorInfo object */ virtual ITensorInfo &set_tensor_shape(const TensorShape &shape) = 0; + /** Set the state for each dimension of the tensor + * + * This sets the state of each dimension of the shape in terms of dynamic behavior using -1 where appropriate. + * The index in the state is a 1 to 1 mapping with the shape dimension index. + * For example if you want to express [?, 3, 3] as a dynamic input then [-1, 3, 3] has to be set as a state + * + * @param[in] state Tensor dimensions state + * + * @return Reference to this ITensorInfo object + */ + virtual ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) = 0; /** Set the quantization settings (scale and offset) of the tensor. * * @param[in] quantization_info QuantizationInfo containing the scale and offset @@ -107,6 +146,17 @@ public: * @return True if the strides or the offset to the first element have changed. */ virtual bool auto_padding() = 0; + /** Set the lock paddings flag of the tensor. + * It should be set to True, when the tensor could be mapped to camera or frame buffer. + * + * @return Reference to this ITensorInfo object + */ + virtual ITensorInfo &set_lock_paddings(bool flag) = 0; + /** Get the lock paddings flag value + * + * @return lock paddings flag value + */ + virtual bool lock_paddings() const = 0; /** Update the offset to the first element, the strides and the total size. * * @note This function can only increase the offset, strides and total size. @@ -170,6 +220,11 @@ public: * @return A vector with the size for each dimension of the tensor */ virtual const TensorShape &tensor_shape() const = 0; + /** State of each dimension of the tensor shape + * + * @return A vector with the state for each dimension of the tensor, where -1 specifies dynamic dimension + */ + virtual const TensorDimsState &tensor_dims_state() const = 0; /** Data type used for each element of the tensor * * @return Tensor data type @@ -205,6 +260,11 @@ public: * @return True if its dynamic else false */ virtual bool is_dynamic() const = 0; + /** Flag indicating whether the values of the tensor are constant, meaning that they can change on kernel/function execution. + * + * @return True if values are constant else false + */ + virtual bool are_values_constant() const = 0; /** Set the flag whether the tensor size can be changed. * * @param[in] is_resizable Flag that marks the tensor if it can be changed or not. @@ -212,13 +272,13 @@ public: * @return Reference to this ITensorInfo object */ virtual ITensorInfo &set_is_resizable(bool is_resizable) = 0; - /** Set the flag whether the tensor size is dynamic. + /** Set the flag whether the tensor values can change during kernel/function execution. * - * @param[in] is_dynamic Flag that marks the tensor if it's dynamic. + * @param[in] are_values_constant Flag that marks the tensor values if they can be changed or not. * * @return Reference to this ITensorInfo object */ - virtual ITensorInfo &set_is_dynamic(bool is_dynamic) = 0; + virtual ITensorInfo &set_are_values_constant(bool are_values_constant) = 0; /** Valid region of the tensor. All elements in the valid region have defined values, i.e. are not undefined. * * @return The valid region. @@ -240,7 +300,20 @@ public: * @return A DataLayout containing the layout data information. */ virtual DataLayout data_layout() const = 0; - + /** Get the workload tensor id of the tensor. + * + * @return Workload tensor id of the tensor + */ + virtual Id id() const = 0; + /** Set the tensor id + */ + virtual ITensorInfo &set_id(ITensorInfo::Id id) = 0; + /** Check if the tensor id is valid + */ + bool has_valid_id() const + { + return id() != invalid_tensor_id; + } /** If infos are broadcast compatible tensor info's, return the broadcasted shape and the intersection of * the broadcasted valid regions of the tensors. * @@ -256,23 +329,23 @@ public: * not broadcast compatible. */ template <typename... Infos> - static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &... infos) + static std::pair<TensorShape, ValidRegion> broadcast_shape_and_valid_region(const Infos &...infos) { TensorShape bc_shape = TensorShape::broadcast_shape(infos.tensor_shape()...); - ValidRegion bc_valid_region{ Coordinates(), bc_shape }; + ValidRegion bc_valid_region{Coordinates(), bc_shape}; - auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo & info) + auto broadcast_valid_region = [&bc_valid_region](const ITensorInfo &info) { - if(info.num_dimensions() != 0) + if (info.num_dimensions() != 0) { - for(size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d) + for (size_t d = 0; d < bc_valid_region.shape.num_dimensions(); ++d) { const bool is_broadcast = (info.tensor_shape()[d] == 1); const int anchor_max = std::max(bc_valid_region.anchor[d], info.valid_region().anchor[d]); const size_t valid_min = std::min(bc_valid_region.shape[d], info.valid_region().shape[d]); - if(!is_broadcast || (valid_min == 0)) + if (!is_broadcast || (valid_min == 0)) { bc_valid_region.anchor.set(d, anchor_max); bc_valid_region.shape.set(d, valid_min); @@ -285,6 +358,10 @@ public: return std::pair<TensorShape, ValidRegion>(bc_shape, bc_valid_region); } + +private: + static constexpr int32_t _dynamic_dimension = -1; + static constexpr int32_t _static_dimension = 0; }; } // namespace arm_compute #endif /*ARM_COMPUTE_TENSORINFO_H */ diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h new file mode 100644 index 0000000000..f456c50769 --- /dev/null +++ b/arm_compute/core/ITensorPack.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ITENSORPACK_H +#define ARM_COMPUTE_ITENSORPACK_H + +#include "arm_compute/core/experimental/Types.h" + +#include <cstddef> +#include <cstdint> +#include <unordered_map> + +namespace arm_compute +{ +// Forward declaration +class ITensor; + +/** Tensor packing service */ +class ITensorPack +{ +public: + struct PackElement + { + PackElement() = default; + PackElement(int id, ITensor *tensor) : id(id), tensor(tensor), ctensor(nullptr) + { + } + PackElement(int id, const ITensor *ctensor) : id(id), tensor(nullptr), ctensor(ctensor) + { + } + + int id{-1}; + ITensor *tensor{nullptr}; + const ITensor *ctensor{nullptr}; + }; + +public: + /** Default Constructor */ + ITensorPack() = default; + /** Initializer list Constructor */ + ITensorPack(std::initializer_list<PackElement> l); + /** Add tensor to the pack + * + * @param[in] id ID/type of the tensor to add + * @param[in] tensor Tensor to add + */ + void add_tensor(int id, ITensor *tensor); + + /** Add const tensor to the pack + * + * @param[in] id ID/type of the tensor to add + * @param[in] tensor Tensor to add + */ + void add_tensor(int id, const ITensor *tensor); + + /** Add const tensor to the pack + * + * @param[in] id ID/type of the tensor to add + * @param[in] tensor Tensor to add + */ + void add_const_tensor(int id, const ITensor *tensor); + /** Get tensor of a given id from the pac + * + * @param[in] id ID of tensor to extract + * + * @return The pointer to the tensor if exist and is non-const else nullptr + */ + ITensor *get_tensor(int id); + /** Get constant tensor of a given id + * + * @param[in] id ID of tensor to extract + * + * @return The pointer to the tensor if exist and is const else nullptr + */ + const ITensor *get_const_tensor(int id) const; + /** Remove the tensor stored with the given id + * + * @param[in] id ID of tensor to remove + */ + void remove_tensor(int id); + /** Pack size accessor + * + * @return Number of tensors registered to the pack + */ + size_t size() const; + /** Checks if pack is empty + * + * @return True if empty else false + */ + bool empty() const; + +private: + std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */ +}; +} // namespace arm_compute +#endif /*ARM_COMPUTE_ITENSORPACK_H */ diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index de08288dec..168a06a55c 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,113 +21,139 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H -#define ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H +#ifndef ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H +#define ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" namespace arm_compute { /** Descriptor for FFT scale kernels */ struct FFTScaleKernelInfo { - float scale{ 0.f }; /**< Axis to perform the kernel on. */ - bool conjugate{ true }; /**< Flag to conjugate the output/ */ + float scale{0.f}; /**< Axis to perform the kernel on. */ + bool conjugate{true}; /**< Flag to conjugate the output/ */ }; /** Descriptor for FFT digit reverse kernels */ struct FFTDigitReverseKernelInfo { - unsigned int axis{ 0 }; /**< Axis to perform the kernel on. */ - bool conjugate{ false }; /**< Flag to conjugate the output/ */ + unsigned int axis{0}; /**< Axis to perform the kernel on. */ + bool conjugate{false}; /**< Flag to conjugate the output/ */ }; /** Descriptor used by the FFT core kernels */ struct FFTRadixStageKernelInfo { - unsigned int axis{ 0 }; /**< Axis to run the kernel on. */ - unsigned int radix{ 0 }; /**< Radix to use. */ - unsigned int Nx{ 0 }; /**< Nx coefficient. */ - bool is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ + unsigned int axis{0}; /**< Axis to run the kernel on. */ + unsigned int radix{0}; /**< Radix to use. */ + unsigned int Nx{0}; /**< Nx coefficient. */ + bool is_first_stage{false}; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */ }; +class ITensorInfo; /** Descriptor used by the GEMM kernels */ struct GEMMKernelInfo { GEMMKernelInfo() = default; - GEMMKernelInfo( - unsigned int im, - unsigned int in, - unsigned int ik, - unsigned int idepth_output_gemm3d, - bool ireinterpret_input_as_3d, - bool ibroadcast_bias, - bool ifp_mixed_precision, - ActivationLayerInfo iactivation_info, - int inmult_transpose1xW_width, - int imult_interleave4x4_height, - GEMMLHSMatrixInfo ilhs_info, - GEMMRHSMatrixInfo irhs_info, - int32_t ina_offset, - int32_t inb_offset) - : m(im), n(in), k(ik), depth_output_gemm3d(idepth_output_gemm3d), reinterpret_input_as_3d(ireinterpret_input_as_3d), broadcast_bias(ibroadcast_bias), fp_mixed_precision(ifp_mixed_precision), - activation_info(iactivation_info), mult_transpose1xW_width(inmult_transpose1xW_width), mult_interleave4x4_height(imult_interleave4x4_height), lhs_info(ilhs_info), rhs_info(irhs_info), - a_offset(ina_offset), b_offset(inb_offset) + GEMMKernelInfo(unsigned int im, + unsigned int in, + unsigned int ik, + unsigned int idepth_output_gemm3d, + bool ireinterpret_input_as_3d, + bool ibroadcast_bias, + bool ifp_mixed_precision, + bool ihas_pad_y, + ActivationLayerInfo iactivation_info, + int inmult_transpose1xW_width, + int imult_interleave4x4_height, + GEMMLHSMatrixInfo ilhs_info, + GEMMRHSMatrixInfo irhs_info, + int32_t ina_offset, + int32_t inb_offset) + : m(im), + n(in), + k(ik), + depth_output_gemm3d(idepth_output_gemm3d), + reinterpret_input_as_3d(ireinterpret_input_as_3d), + broadcast_bias(ibroadcast_bias), + fp_mixed_precision(ifp_mixed_precision), + has_pad_y(ihas_pad_y), + activation_info(iactivation_info), + mult_transpose1xW_width(inmult_transpose1xW_width), + mult_interleave4x4_height(imult_interleave4x4_height), + lhs_info(ilhs_info), + rhs_info(irhs_info), + a_offset(ina_offset), + b_offset(inb_offset) { } - unsigned int m{ 0 }; /**< Number of LHS rows*/ - unsigned int n{ 0 }; /**< Number of RHS columns*/ - unsigned int k{ 0 }; /**< Number of LHS columns or RHS rows */ - unsigned int depth_output_gemm3d{ 0 }; /**< Depth of the output tensor in case is reinterpreted as 3D */ - bool reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */ - bool broadcast_bias{ false }; /**< Flag used to broadcast the bias addition */ - bool fp_mixed_precision{ false }; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ - ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ - int mult_transpose1xW_width{ 1 }; /**< Multiplication factor for the width of the 1xW transposed block */ - int mult_interleave4x4_height{ 1 }; /**< Multiplication factor for the height of the 4x4 interleaved block */ - GEMMLHSMatrixInfo lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */ - GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */ - int32_t a_offset{ 0 }; /**< Offset to be added to each element of the matrix A */ - int32_t b_offset{ 0 }; /**< Offset to be added to each element of the matrix B */ - GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */ + unsigned int m{0}; /**< Number of LHS rows*/ + unsigned int n{0}; /**< Number of RHS columns*/ + unsigned int k{0}; /**< Number of LHS columns or RHS rows */ + unsigned int depth_output_gemm3d{0}; /**< Depth of the output tensor in case is reinterpreted as 3D */ + bool reinterpret_input_as_3d{false}; /**< Flag used to reinterpret the input as 3D */ + bool broadcast_bias{false}; /**< Flag used to broadcast the bias addition */ + bool fp_mixed_precision{false}; /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */ + bool has_pad_y{ + false}; /**< Flag used to indicate if the input/output tensors have internal pad on the y direction */ + ActivationLayerInfo activation_info{}; /**< Activation function to perform after the matrix multiplication */ + int mult_transpose1xW_width{1}; /**< Multiplication factor for the width of the 1xW transposed block */ + int mult_interleave4x4_height{1}; /**< Multiplication factor for the height of the 4x4 interleaved block */ + GEMMLHSMatrixInfo + lhs_info{}; /**< LHS matrix information used to retrieve the number of rows processed by each thread */ + GEMMRHSMatrixInfo rhs_info{}; /**< RHS matrix information used for reshaping the RHS matrix */ + int32_t a_offset{0}; /**< Offset to be added to each element of the matrix A */ + int32_t b_offset{0}; /**< Offset to be added to each element of the matrix B */ + GEMMLowpOutputStageInfo output_stage{}; /**< GEMMLowp output stage information */ }; -/** Descriptor used by the depthwise convolution kernels */ -struct DWCKernelInfo +/** Compute descriptor used by the depthwise convolution native kernel */ +struct DWCComputeKernelInfo { - ActivationLayerInfo activation_info{}; /**< Activation function to perform after the depthwise convolution */ + unsigned int n0{1}; /**< Number of columns processed by each thread */ + unsigned int m0{1}; /**< Number of rows processed by each thread */ + bool export_input_to_cl_image{false}; /**< Export input to cl_image */ + bool export_weights_to_cl_image{false}; /**< Export the weights to cl_image */ }; -/** Descriptor used by the depthwise convolution kernels to retrieve the number of output elements processed by each thread */ -struct DWCWeightsKernelInfo +/** Compute descriptor used by the direct convolution kernel */ +struct DirectConvComputeKernelInfo { - unsigned int n0{ 0 }; /**< Number of columns processed by each thread */ + int32_t m0{1}; /**< Number of rows to be processed by the kernel */ + int32_t n0{1}; /**< Number of columns to be processed by the kernel */ + int32_t k0{1}; /**< Number of partial accumulations to be processed in a single iteration by the kernel */ + bool export_weights_to_cl_image{false}; /**< Flag to export the weights to cl_image */ + bool export_output_to_cl_image{false}; /**< Flag to export the output to cl_image */ + bool export_input_to_cl_image{false}; /**< Flag to export the input to cl_image */ }; /** Descriptor used by the softmax kernels */ struct SoftmaxKernelInfo { - float beta{ 1.f }; /**< A scaling factor for the exponent with default value 1.0 */ - bool is_log{ false }; /**< Flag used to perform Log Softmax operation */ - DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */ + float beta{1.f}; /**< A scaling factor for the exponent with default value 1.0 */ + bool is_log{false}; /**< Flag used to perform Log Softmax operation */ + DataType input_data_type{DataType::UNKNOWN}; /**< Input tensor data type */ + int32_t axis{0}; /**< The dimension in which to apply softmax. */ }; /** Descriptor used by the direct convolution layer output stage kernels */ struct DirectConvolutionLayerOutputStageKernelInfo { - int32_t result_fixedpoint_multiplier{ 0 }; /**< Result output stage multiplier used for quantizing */ - int32_t result_shift{ 0 }; /**< Result output stage shift used for quantizing */ - int32_t result_offset_after_shift{ 0 }; /**< Result offset used for quantizing */ - DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ + int32_t result_fixedpoint_multiplier{0}; /**< Result output stage multiplier used for quantizing */ + int32_t result_shift{0}; /**< Result output stage shift used for quantizing */ + int32_t result_offset_after_shift{0}; /**< Result offset used for quantizing */ + DataType output_data_type{ + DataType::UNKNOWN}; /**< Output tensor data type to use if the output is not initialized */ }; struct InstanceNormalizationLayerKernelInfo { /** Default constructor */ - InstanceNormalizationLayerKernelInfo() - : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true) + InstanceNormalizationLayerKernelInfo() : InstanceNormalizationLayerKernelInfo(1.f, 0.f, 1e-12, true) { } /** Constructor @@ -164,10 +190,10 @@ struct GEMMLowpReductionKernelInfo { } - int32_t k{ 0 }; /**< Number of matrix columns/rows */ - bool is_reshaped{ false }; /**< True if the input tensor has been reshaped */ - int32_t scalar{ 0 }; /**< Scalar value to multiply each reduced column/row by */ - bool mul_by_scalar{ false }; /**< True if each column/row reduction has to be multiplied by a scalar value */ + int32_t k{0}; /**< Number of matrix columns/rows */ + bool is_reshaped{false}; /**< True if the input tensor has been reshaped */ + int32_t scalar{0}; /**< Scalar value to multiply each reduced column/row by */ + bool mul_by_scalar{false}; /**< True if each column/row reduction has to be multiplied by a scalar value */ }; struct ScaleKernelInfo @@ -180,19 +206,22 @@ struct ScaleKernelInfo * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true. * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false. + * @param[in] data_layout (Optional) Data layout used by the layer. Defaults to @ref DataLayout::UNKNOWN */ ScaleKernelInfo(InterpolationPolicy interpolation_policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, - bool align_corners = false) - : interpolation_policy{ interpolation_policy }, - border_mode{ border_mode }, - constant_border_value{ constant_border_value }, - sampling_policy{ sampling_policy }, - use_padding{ use_padding }, - align_corners{ align_corners } + bool align_corners = false, + DataLayout data_layout = DataLayout::UNKNOWN) noexcept + : interpolation_policy{interpolation_policy}, + border_mode{border_mode}, + constant_border_value{constant_border_value}, + sampling_policy{sampling_policy}, + use_padding{use_padding}, + align_corners{align_corners}, + data_layout{data_layout} { } @@ -202,6 +231,23 @@ struct ScaleKernelInfo SamplingPolicy sampling_policy; /**< Sampling policy used by the interpolation. */ bool use_padding; /**< Indication of using padding */ bool align_corners; /**< Align corners of input and output */ + DataLayout data_layout; /**< Data layout to use */ +}; + +struct MatMulKernelInfo +{ + MatMulKernelInfo() = default; + MatMulKernelInfo( + bool adj_lhs, bool adj_rhs, int m0 = 1, int n0 = 1, int k0 = 1, bool export_rhs_to_cl_image = false) + : adj_lhs{adj_lhs}, adj_rhs{adj_rhs}, m0{m0}, n0{n0}, k0{k0}, export_rhs_to_cl_image{export_rhs_to_cl_image} + { + } + bool adj_lhs{false}; /**< Get Adjoint LHS flag value */ + bool adj_rhs{false}; /**< Get Adjoint RHS flag value */ + int m0{1}; /**< Number of output rows processed by each work-item*/ + int n0{1}; /**< Number of output columns processed by each work-item*/ + int k0{1}; /**< Number of inner accumulations */ + bool export_rhs_to_cl_image{false}; /**< Flag to know whether the RHS tensor should be exported to cl_image*/ }; } // namespace arm_compute -#endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */ +#endif // ACL_ARM_COMPUTE_CORE_KERNELDESCRIPTORS_H diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h index 1515557f4c..03b861f765 100644 --- a/arm_compute/core/Log.h +++ b/arm_compute/core/Log.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,11 +34,11 @@ #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() \ do \ { \ - if(arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \ + if (arm_compute::logging::LoggerRegistry::get().logger("CORE") == nullptr) \ { \ arm_compute::logging::LoggerRegistry::get().create_reserved_loggers(); \ } \ - } while(false) + } while (false) #else /* ARM_COMPUTE_LOGGING_ENABLED */ #define ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER() #endif /* ARM_COMPUTE_LOGGING_ENABLED */ @@ -53,7 +53,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \ - } while(false) + } while (false) /** Log a message with format to the core system logger * @@ -66,7 +66,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \ - } while(false) + } while (false) /** Log a stream to the core system logger * @@ -78,7 +78,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \ - } while(false) + } while (false) /** Log information level message to the core system logger * @@ -89,7 +89,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \ - } while(false) + } while (false) /** Log information level formatted message to the core system logger * @@ -101,7 +101,7 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, #fmt, __VA_ARGS__); \ - } while(false) + } while (false) /** Log information level stream to the core system logger * @@ -112,6 +112,6 @@ { \ ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \ - } while(false) + } while (false) #endif /* ARM_COMPUTE_LOGGING_MACROS_H */ diff --git a/arm_compute/core/MultiImageInfo.h b/arm_compute/core/MultiImageInfo.h deleted file mode 100644 index fcd7ba744d..0000000000 --- a/arm_compute/core/MultiImageInfo.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MULTIIMAGEINFO_H -#define ARM_COMPUTE_MULTIIMAGEINFO_H - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -/** Store the multi-planar image's metadata */ -class MultiImageInfo -{ -public: - /** Constructor */ - MultiImageInfo(); - /** Initialize the metadata structure with the given parameters - * - * @param[in] width Width of the image (in number of pixels) - * @param[in] height Height of the image (in number of pixels) - * @param[in] format Colour format of the image. - */ - void init(unsigned int width, unsigned int height, Format format); - /** Colour format of the image - * - * @return Colour format of the image - */ - Format format() const; - /** Width in pixels - * - * @return The width in pixels - */ - unsigned int width() const; - /** Height in pixels - * - * @return The height in pixels - */ - unsigned int height() const; - -protected: - unsigned int _width; - unsigned int _height; - Format _format; -}; -} -#endif /*ARM_COMPUTE_MULTIIMAGEINFO_H */ diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h deleted file mode 100644 index 5d9c1ec1e2..0000000000 --- a/arm_compute/core/NEON/INESimpleKernel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H -#define ARM_COMPUTE_INESIMPLEKERNEL_H - -#include "arm_compute/core/CPP/ICPPSimpleKernel.h" - -namespace arm_compute -{ -/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */ -using INESimpleKernel = ICPPSimpleKernel; -} // namespace arm_compute -#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */ diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h deleted file mode 100644 index e4f4250d16..0000000000 --- a/arm_compute/core/NEON/NEAsymm.h +++ /dev/null @@ -1,760 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEASYMM_H -#define ARM_COMPUTE_NEASYMM_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -using qasymm8x8_t = uint8x8_t; /**< 8 bit quantized asymmetric vector with 8 elements */ -using qasymm8x8x2_t = uint8x8x2_t; /**< 8 bit quantized asymmetric vector with 16 elements */ -using qasymm8x8x3_t = uint8x8x3_t; /**< 8 bit quantized asymmetric vector with 24 elements */ -using qasymm8x8x4_t = uint8x8x4_t; /**< 8 bit quantized asymmetric vector with 32 elements */ -using qasymm8x16_t = uint8x16_t; /**< 8 bit quantized asymmetric vector with 16 elements */ - -using qasymm8x8_signed_t = int8x8_t; /**< 8 bit quantized signed asymmetric vector with 8 elements */ -using qasymm8x8x2_signed_t = int8x8x2_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */ -using qasymm8x8x3_signed_t = int8x8x3_t; /**< 8 bit quantized signed asymmetric vector with 24 elements */ -using qasymm8x8x4_signed_t = int8x8x4_t; /**< 8 bit quantized signed asymmetric vector with 32 elements */ -using qasymm8x16_signed_t = int8x16_t; /**< 8 bit quantized signed asymmetric vector with 16 elements */ - -/** Perform a multiply-accumulate on all 16 components of a QASYMM8 vector - * - * vd*vs + vo - * - * @param[in] vd Input vector value in QASYMM8 format - * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes. - * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes. - * - * @return A 16-component vector in QASYMM8 format, saturated to fit - */ -uint8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo); - -/** Perform a multiply-accumulate on all 16 components of a QASYMM8_SIGNED vector - * - * vd*vs + vo - * - * @param[in] vd Input vector value in QASYMM8_SIGNED format - * @param[in] vs Vector multiplier in F32 format. The multiplier value must be duplicated across all four lanes. - * @param[in] vo Vector addend in F32 format. The addend value must be duplicated across all four lanes. - * - * @return A 16-component vector in QASYMM8_SIGNED format, saturated to fit - */ -int8x16_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo); - -/** Performs final quantization step on 16 elements - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param in_s32 Input to be quantized. - * @param result_fixedpoint_multiplier Result multiplier parameter - * @param result_shift Result shift parameter - * @param result_offset_after_shift_s32 Result offset parameter - * @param min_u8 Relu lower bound - * @param max_u8 Relu upper bound - * - * @return Quantized values - */ -template <bool is_bounded_relu> -uint8x16_t finalize_quantization(int32x4x4_t &in_s32, - int result_fixedpoint_multiplier, - int32_t result_shift, - int32x4_t result_offset_after_shift_s32, - uint8x16_t min_u8, - uint8x16_t max_u8) -{ - const static int32x4_t zero_s32 = vdupq_n_s32(0); - - if(result_shift < 0) - { - in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift))); - in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift))); - in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift))); - in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift))); - - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier); - in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier); - in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier); - - // Round to the nearest division by a power-of-two using result_shift_s32 - in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift); - in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift); - in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift); - in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift); - } - - // Add the offset terms - in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32); - in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32); - in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32); - in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32); - - // Saturate negative values - in_s32.val[0] = vmaxq_s32(in_s32.val[0], zero_s32); - in_s32.val[1] = vmaxq_s32(in_s32.val[1], zero_s32); - in_s32.val[2] = vmaxq_s32(in_s32.val[2], zero_s32); - in_s32.val[3] = vmaxq_s32(in_s32.val[3], zero_s32); - - // Convert S32 to S16 - const int16x8x2_t in_s16 = - { - { - vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])), - vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3])) - } - }; - - // Convert S16 to U8 - uint8x16_t out_u8 = vcombine_u8(vqmovun_s16(in_s16.val[0]), vqmovun_s16(in_s16.val[1])); - - if(is_bounded_relu) - { - out_u8 = vmaxq_u8(out_u8, min_u8); - out_u8 = vminq_u8(out_u8, max_u8); - } - - return out_u8; -} - -/** Performs final quantization step on 16 elements - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param in_s32 Input to be quantized. - * @param result_fixedpoint_multiplier Result multiplier parameter - * @param result_shift Result shift parameter - * @param result_offset_after_shift_s32 Result offset parameter - * @param min_s8 Relu lower bound - * @param max_s8 Relu upper bound - * - * @return Quantized values - */ -template <bool is_bounded_relu> -int8x16_t finalize_quantization(int32x4x4_t &in_s32, - int result_fixedpoint_multiplier, - int32_t result_shift, - int32x4_t result_offset_after_shift_s32, - int8x16_t min_s8, - int8x16_t max_s8) -{ - if(result_shift < 0) - { - in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << (-result_shift))); - in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << (-result_shift))); - in_s32.val[2] = vmulq_n_s32(in_s32.val[2], (1 << (-result_shift))); - in_s32.val[3] = vmulq_n_s32(in_s32.val[3], (1 << (-result_shift))); - - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier); - in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - in_s32.val[2] = vqrdmulhq_n_s32(in_s32.val[2], result_fixedpoint_multiplier); - in_s32.val[3] = vqrdmulhq_n_s32(in_s32.val[3], result_fixedpoint_multiplier); - - // Round to the nearest division by a power-of-two using result_shift_s32 - in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift); - in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift); - in_s32.val[2] = rounding_divide_by_pow2(in_s32.val[2], result_shift); - in_s32.val[3] = rounding_divide_by_pow2(in_s32.val[3], result_shift); - } - - // Add the offset terms - in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32); - in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32); - in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32); - in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32); - - // Convert S32 to S16 - const int16x8x2_t in_s16 = - { - { - vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])), - vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3])) - } - }; - - // Convert S16 to S8 - int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1])); - - if(is_bounded_relu) - { - out_s8 = vmaxq_s8(out_s8, min_s8); - out_s8 = vminq_s8(out_s8, max_s8); - } - - return out_s8; -} - -/** Performs final quantization step on 16 elements for symmetric quantization - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param in_s32 Input to be quantized. - * @param result_fixedpoint_multiplier Result multiplier parameter - * @param result_shift Result shift parameter - * @param result_offset_after_shift_s32 Result offset parameter - * @param min_s8 Relu lower bound - * @param max_s8 Relu upper bound - * - * @return Quantized values - */ -template <bool is_bounded_relu> -inline int8x16_t finalize_quantization_symm(int32x4x4_t &in_s32, - const int32x4x4_t &result_fixedpoint_multiplier, - const int32x4x4_t &result_shift, - const int32x4_t &result_offset_after_shift_s32, - const int8x16_t &min_s8, - const int8x16_t &max_s8) -{ - const static int32x4_t one_s32 = vdupq_n_s32(1); - - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - int32x4x4_t res_shift_gt0 = - { - vqrdmulhq_s32(in_s32.val[0], result_fixedpoint_multiplier.val[0]), - vqrdmulhq_s32(in_s32.val[1], result_fixedpoint_multiplier.val[1]), - vqrdmulhq_s32(in_s32.val[2], result_fixedpoint_multiplier.val[2]), - vqrdmulhq_s32(in_s32.val[3], result_fixedpoint_multiplier.val[3]), - }; - // Round to the nearest division by a power-of-two using result_shift_s32 - res_shift_gt0.val[0] = rounding_divide_by_pow2(res_shift_gt0.val[0], result_shift.val[0]); - res_shift_gt0.val[1] = rounding_divide_by_pow2(res_shift_gt0.val[1], result_shift.val[1]); - res_shift_gt0.val[2] = rounding_divide_by_pow2(res_shift_gt0.val[2], result_shift.val[2]); - res_shift_gt0.val[3] = rounding_divide_by_pow2(res_shift_gt0.val[3], result_shift.val[3]); - - int32x4x4_t res_shift_lt0 = - { - vmulq_s32(in_s32.val[0], vshlq_s32(one_s32, vnegq_s32(result_shift.val[0]))), - vmulq_s32(in_s32.val[1], vshlq_s32(one_s32, vnegq_s32(result_shift.val[1]))), - vmulq_s32(in_s32.val[2], vshlq_s32(one_s32, vnegq_s32(result_shift.val[2]))), - vmulq_s32(in_s32.val[3], vshlq_s32(one_s32, vnegq_s32(result_shift.val[3]))), - }; - res_shift_lt0.val[0] = vqrdmulhq_s32(res_shift_lt0.val[0], result_fixedpoint_multiplier.val[0]); - res_shift_lt0.val[1] = vqrdmulhq_s32(res_shift_lt0.val[1], result_fixedpoint_multiplier.val[1]); - res_shift_lt0.val[2] = vqrdmulhq_s32(res_shift_lt0.val[2], result_fixedpoint_multiplier.val[2]); - res_shift_lt0.val[3] = vqrdmulhq_s32(res_shift_lt0.val[3], result_fixedpoint_multiplier.val[3]); - - // Select result depending on shift value - const uint32x4x4_t mask_lt0 = - { -#ifdef __aarch64__ - vcltzq_s32(result_shift.val[0]), - vcltzq_s32(result_shift.val[1]), - vcltzq_s32(result_shift.val[2]), - vcltzq_s32(result_shift.val[3]), -#else //__aarch64__ - vcltq_s32(result_shift.val[0], vdupq_n_s32(0)), - vcltq_s32(result_shift.val[1], vdupq_n_s32(0)), - vcltq_s32(result_shift.val[2], vdupq_n_s32(0)), - vcltq_s32(result_shift.val[3], vdupq_n_s32(0)), -#endif //__aarch64__ - }; - - in_s32.val[0] = vbslq_s32(mask_lt0.val[0], res_shift_lt0.val[0], res_shift_gt0.val[0]); - in_s32.val[1] = vbslq_s32(mask_lt0.val[1], res_shift_lt0.val[1], res_shift_gt0.val[1]); - in_s32.val[2] = vbslq_s32(mask_lt0.val[2], res_shift_lt0.val[2], res_shift_gt0.val[2]); - in_s32.val[3] = vbslq_s32(mask_lt0.val[3], res_shift_lt0.val[3], res_shift_gt0.val[3]); - - // Add the offset terms - in_s32.val[0] = vaddq_s32(in_s32.val[0], result_offset_after_shift_s32); - in_s32.val[1] = vaddq_s32(in_s32.val[1], result_offset_after_shift_s32); - in_s32.val[2] = vaddq_s32(in_s32.val[2], result_offset_after_shift_s32); - in_s32.val[3] = vaddq_s32(in_s32.val[3], result_offset_after_shift_s32); - - // Convert S32 to S16 - const int16x8x2_t in_s16 = - { - { - vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])), - vcombine_s16(vqmovn_s32(in_s32.val[2]), vqmovn_s32(in_s32.val[3])) - } - }; - - // Convert S16 to S8 - int8x16_t out_s8 = vcombine_s8(vqmovn_s16(in_s16.val[0]), vqmovn_s16(in_s16.val[1])); - - if(is_bounded_relu) - { - out_s8 = vmaxq_s8(out_s8, min_s8); - out_s8 = vminq_s8(out_s8, max_s8); - } - - return out_s8; -} - -/** Performs final quantization step on single element - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param[in] in_value Input to be quantized. - * @param[in] result_fixedpoint_multiplier Result multiplier parameter - * @param[in] result_shift Result shift parameter - * @param[in] result_offset_after_shift_s32 Result offset parameter - * @param[in] min_u8 Relu lower bound - * @param[in] max_u8 Relu upper bound - * - * @return Quantized value - */ -template <bool is_bounded_relu> -inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier, - int32_t result_shift, int32_t result_offset_after_shift_s32, - uint8_t min_u8, uint8_t max_u8) -{ - int32x4_t in_s32 = vdupq_n_s32(in_value); - - if(result_shift < 0) - { - in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0); - // Shift value by result_shift_s32 - in_value = rounding_divide_by_pow2(in_value, result_shift); - } - - // Add the offset term - in_value += result_offset_after_shift_s32; - - // Bound the result - uint8_t out_u8 = static_cast<uint8_t>(std::max<int32_t>(0, std::min<int32_t>(255, in_value))); - if(is_bounded_relu) - { - out_u8 = static_cast<uint8_t>(std::max(min_u8, std::min(max_u8, out_u8))); - } - - return out_u8; -} - -/** Performs final quantization step on single element - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param[in] in_value Input to be quantized. - * @param[in] result_fixedpoint_multiplier Result multiplier parameter - * @param[in] result_shift Result shift parameter - * @param[in] result_offset_after_shift_s32 Result offset parameter - * @param[in] min_s8 Relu lower bound - * @param[in] max_s8 Relu upper bound - * - * @return Quantized value - */ -template <bool is_bounded_relu> -inline int8_t finalize_quantization(int32_t in_value, int result_fixedpoint_multiplier, - int32_t result_shift, int32_t result_offset_after_shift_s32, - int8_t min_s8, int8_t max_s8) -{ - int32x4_t in_s32 = vdupq_n_s32(in_value); - - if(result_shift < 0) - { - in_value = vgetq_lane_s32(vqrdmulhq_n_s32(vmulq_n_s32(in_s32, (1 << (-result_shift))), result_fixedpoint_multiplier), 0); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_value = vgetq_lane_s32(vqrdmulhq_n_s32(in_s32, result_fixedpoint_multiplier), 0); - - // Shift value by result_shift_s32 - in_value = rounding_divide_by_pow2(in_value, result_shift); - } - - // Add the offset term - in_value += result_offset_after_shift_s32; - - // Bound the result - int8_t out_s8 = static_cast<int8_t>(std::max<int32_t>(-128, std::min<int32_t>(127, in_value))); - if(is_bounded_relu) - { - out_s8 = static_cast<int8_t>(std::max(min_s8, std::min(max_s8, out_s8))); - } - - return out_s8; -} - -/** Dequantize a neon vector holding 8 quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x2_t vdequantize(const uint8x8_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x2_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize a neon vector holding 8 singed quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x2_t vdequantize(const int8x8_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x2_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(qv))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(qv))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize a neon vector holding 16 quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const uint8x16_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize a neon vector holding 16 signed quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const int8x16_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] scale Quantization scaling factor. - * @param[in] offset Zero quantization offset. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offset) -{ - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize a vector of 16 values stored as signed asymmetric. - * - * @param[in] qv Input values to be dequantized. - * @param[in] scale Quantization scaling factor. - * @param[in] offset Zero quantization offset. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale, int32_t offset) -{ - const int32x4_t voffset = vdupq_n_s32(offset); - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), - vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv)))), voffset)), vscale), - } - }; - return vdequantized_input; -} - -/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] vscale Vector containing quantization scaling factors. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale) -{ - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]), - } - }; - return vdequantized_input; -} - -/** Dequantize following a symmetric quantization scheme a neon vector holding 16 quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] scale Quantization scaling factor. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const int8x16_t &qv, float scale) -{ - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale), - } - }; - return vdequantized_input; -} - -/** Quantize a neon vector holding 8 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the quantized values - */ -inline uint8x8_t vquantize(const float32x4x2_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const float32x4_t voffset = vdupq_n_f32(offset); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), -#endif //__aarch64__ - } - }; - return vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); -} - -/** Quantize a neon vector holding 8 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the singed quantized values - */ -inline int8x8_t vquantize_signed(const float32x4x2_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const float32x4_t voffset = vdupq_n_f32(offset); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), -#endif //__aarch64__ - } - }; - return vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); -} - -/** Quantize a neon vector holding 16 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the quantized values - */ -inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const float32x4_t voffset = vdupq_n_f32(offset); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#endif //__aarch64__ - } - }; - const uint8x8_t pa = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); - const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); - return vcombine_u8(pa, pb); -} - -/** Signed quantize a neon vector holding 16 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the quantized values - */ -inline int8x16_t vquantize_signed(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const float32x4_t voffset = vdupq_n_f32(offset); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#endif //__aarch64__ - } - }; - const int8x8_t pa = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1]))); - const int8x8_t pb = vqmovn_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3]))); - return vcombine_s8(pa, pb); -} - -/** Quantize to QASYMM16 a neon vector holding 16 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the quantized values - */ -inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const int offset = qi.offset; - const float32x4_t voffset = vdupq_n_f32(offset); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)), - vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)), -#endif //__aarch64__ - } - }; - const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1])); - const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3])); - return { pa, pb }; -} -} // namespace arm_compute -#include "arm_compute/core/NEON/NEAsymm.inl" -#endif // ARM_COMPUTE_NEASYMM_H diff --git a/arm_compute/core/NEON/NEAsymm.inl b/arm_compute/core/NEON/NEAsymm.inl deleted file mode 100644 index 71205e0403..0000000000 --- a/arm_compute/core/NEON/NEAsymm.inl +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -namespace arm_compute -{ -inline qasymm8x16_t vmlaq_qasymm8(qasymm8x16_t vd, float32x4_t vs, float32x4_t vo) -{ - // Convert uint8 vectors to uint16 vectors - const uint8x8_t vd_low = vget_low_u8(vd); - const uint8x8_t vd_high = vget_high_u8(vd); - uint16x8_t vd_low_u16x8 = vmovl_u8(vd_low); - uint16x8_t vd_high_u16x8 = vmovl_u8(vd_high); - // Convert uint16 vectors to uint32 vectors - uint32x4_t A_u32x4 = vmovl_u16(vget_low_u16(vd_low_u16x8)); - uint32x4_t B_u32x4 = vmovl_u16(vget_high_u16(vd_low_u16x8)); - uint32x4_t C_u32x4 = vmovl_u16(vget_low_u16(vd_high_u16x8)); - uint32x4_t D_u32x4 = vmovl_u16(vget_high_u16(vd_high_u16x8)); - // Convert uint32 vectors to float32 vectors - float32x4_t A_f32x4 = vcvtq_f32_u32(A_u32x4); - float32x4_t B_f32x4 = vcvtq_f32_u32(B_u32x4); - float32x4_t C_f32x4 = vcvtq_f32_u32(C_u32x4); - float32x4_t D_f32x4 = vcvtq_f32_u32(D_u32x4); - // vd = vd*vs + vo - A_f32x4 = vmlaq_f32(vo, A_f32x4, vs); - B_f32x4 = vmlaq_f32(vo, B_f32x4, vs); - C_f32x4 = vmlaq_f32(vo, C_f32x4, vs); - D_f32x4 = vmlaq_f32(vo, D_f32x4, vs); - // Convert float32 vectors to uint32 vectors - A_u32x4 = vcvtq_u32_f32(A_f32x4); - B_u32x4 = vcvtq_u32_f32(B_f32x4); - C_u32x4 = vcvtq_u32_f32(C_f32x4); - D_u32x4 = vcvtq_u32_f32(D_f32x4); - // Convert uint32 vectors to uint16 vectors (with saturation) - vd_low_u16x8 = vcombine_u16(vqmovn_u32(A_u32x4), vqmovn_u32(B_u32x4)); - vd_high_u16x8 = vcombine_u16(vqmovn_u32(C_u32x4), vqmovn_u32(D_u32x4)); - // convert uint16 vectors to uint8 vectors (with saturation) - return vcombine_u8(vqmovn_u16(vd_low_u16x8), vqmovn_u16(vd_high_u16x8)); -} -inline qasymm8x16_signed_t vmlaq_qasymm8_signed(qasymm8x16_signed_t vd, float32x4_t vs, float32x4_t vo) -{ - // Convert uint8 vectors to int16 vectors - const int8x8_t vd_low = vget_low_s8(vd); - const int8x8_t vd_high = vget_high_s8(vd); - int16x8_t vd_low_s16x8 = vmovl_s8(vd_low); - int16x8_t vd_high_s16x8 = vmovl_s8(vd_high); - // Convert int16 vectors to int32 vectors - int32x4_t A_s32x4 = vmovl_s16(vget_low_s16(vd_low_s16x8)); - int32x4_t B_s32x4 = vmovl_s16(vget_high_s16(vd_low_s16x8)); - int32x4_t C_s32x4 = vmovl_s16(vget_low_s16(vd_high_s16x8)); - int32x4_t D_s32x4 = vmovl_s16(vget_high_s16(vd_high_s16x8)); - // Convert int32 vectors to float32 vectors - float32x4_t A_f32x4 = vcvtq_f32_s32(A_s32x4); - float32x4_t B_f32x4 = vcvtq_f32_s32(B_s32x4); - float32x4_t C_f32x4 = vcvtq_f32_s32(C_s32x4); - float32x4_t D_f32x4 = vcvtq_f32_s32(D_s32x4); - // vd = vd*vs + vo - A_f32x4 = vmlaq_f32(vo, A_f32x4, vs); - B_f32x4 = vmlaq_f32(vo, B_f32x4, vs); - C_f32x4 = vmlaq_f32(vo, C_f32x4, vs); - D_f32x4 = vmlaq_f32(vo, D_f32x4, vs); - // Convert float32 vectors to int32 vectors - A_s32x4 = vcvtq_s32_f32(A_f32x4); - B_s32x4 = vcvtq_s32_f32(B_f32x4); - C_s32x4 = vcvtq_s32_f32(C_f32x4); - D_s32x4 = vcvtq_s32_f32(D_f32x4); - // Convert int32 vectors to int16 vectors (with saturation) - vd_low_s16x8 = vcombine_s16(vqmovn_s32(A_s32x4), vqmovn_s32(B_s32x4)); - vd_high_s16x8 = vcombine_s16(vqmovn_s32(C_s32x4), vqmovn_s32(D_s32x4)); - // convert int16 vectors to int8 vectors (with saturation) - return vcombine_s8(vqmovn_s16(vd_low_s16x8), vqmovn_s16(vd_high_s16x8)); -} -} // namespace arm_compute diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl deleted file mode 100644 index 2cf52e58d2..0000000000 --- a/arm_compute/core/NEON/NEColorConvertHelper.inl +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/Utils.h" - -#include <arm_neon.h> - -namespace -{ -#ifndef DOXYGEN_SKIP_THIS -constexpr float red_coef_bt709 = 1.5748F; -constexpr float green_coef_bt709 = -0.1873f; -constexpr float green_coef2_bt709 = -0.4681f; -constexpr float blue_coef_bt709 = 1.8556f; - -constexpr float rgb2yuv_bt709_kr = 0.2126f; -constexpr float rgb2yuv_bt709_kb = 0.0722f; -// K_g = 1 - K_r - K_b -constexpr float rgb2yuv_bt709_kg = 0.7152f; -// C_u = 1 / (2 * (1 - K_b)) -constexpr float rgb2yuv_bt709_cu = 0.5389f; -// C_v = 1 / (2 * (1 - K_r)) -constexpr float rgb2yuv_bt709_cv = 0.6350f; - -constexpr float rgb2u8_red_coef = 0.2126f; -constexpr float rgb2u8_green_coef = 0.7152f; -constexpr float rgb2u8_blue_coef = 0.0722f; - -inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor, - const float rcoef, const float gcoef, const float bcoef) -{ - float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef); - greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef); - greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef); - return greyscale; -} - -inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out) -{ - float32x4x4_t out_float32; - - //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats - const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]); - const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]); - const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]); - - //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) ) - //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float - out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s - arm_compute::convert_float32x4x4_to_uint8x16(out_float32, out); -} - -inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, - float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) -{ - /* - Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' - U'=-0.1146*R' - 0.3854*G' + 0.5000*B' - V'= 0.5000*R' - 0.4542*G' - 0.0458*B' - */ - const auto c128 = vdupq_n_f32(128.f); - - // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b - yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); - yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); - yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); - - // U = (B - Y) / (2 * (1 - K_b)) - uvec = vsubq_f32(bvec, yvec); - uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); - - // V = (R - Y) / (2 * (1 - K_r)) - vvec = vsubq_f32(rvec, yvec); - vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); -} - -inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, - float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) -{ - float32x4x3_t rgb1, rgb2; - - // Compute: cb - 128 and cr - 128; - const auto c128 = vdupq_n_f32(128.f); - uvec_val = vsubq_f32(uvec_val, c128); - vvec_val = vsubq_f32(vvec_val, c128); - - // Compute: - // r = 0.0000f*f_u + 1.5748f*f_v; - // g = 0.1873f*f_u - 0.4681f*f_v; - // b = 1.8556f*f_u + 0.0000f*f_v; - const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); - const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); - const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), - vmulq_n_f32(vvec_val, green_coef2_bt709)); - - // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. - // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t - // and written back to memory using vst3 instruction - - rgb1.val[0] = vaddq_f32(yvec_val, red); - rgb1.val[1] = vaddq_f32(yvec_val, green); - rgb1.val[2] = vaddq_f32(yvec_val, blue); - - rgb2.val[0] = vaddq_f32(yyvec_val, red); - rgb2.val[1] = vaddq_f32(yyvec_val, green); - rgb2.val[2] = vaddq_f32(yyvec_val, blue); - - uint8x8x3_t u8_rgb; - arm_compute::convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); - - if(!alpha) - { - vst3_lane_u8(&output_ptr[0], u8_rgb, 0); - vst3_lane_u8(&output_ptr[3], u8_rgb, 4); - vst3_lane_u8(&output_ptr[6], u8_rgb, 1); - vst3_lane_u8(&output_ptr[9], u8_rgb, 5); - vst3_lane_u8(&output_ptr[12], u8_rgb, 2); - vst3_lane_u8(&output_ptr[15], u8_rgb, 6); - vst3_lane_u8(&output_ptr[18], u8_rgb, 3); - vst3_lane_u8(&output_ptr[21], u8_rgb, 7); - } - else - { - uint8x8x4_t u8_rgba; - u8_rgba.val[0] = u8_rgb.val[0]; - u8_rgba.val[1] = u8_rgb.val[1]; - u8_rgba.val[2] = u8_rgb.val[2]; - u8_rgba.val[3] = vdup_n_u8(255); - vst4_lane_u8(&output_ptr[0], u8_rgba, 0); - vst4_lane_u8(&output_ptr[4], u8_rgba, 4); - vst4_lane_u8(&output_ptr[8], u8_rgba, 1); - vst4_lane_u8(&output_ptr[12], u8_rgba, 5); - vst4_lane_u8(&output_ptr[16], u8_rgba, 2); - vst4_lane_u8(&output_ptr[20], u8_rgba, 6); - vst4_lane_u8(&output_ptr[24], u8_rgba, 3); - vst4_lane_u8(&output_ptr[28], u8_rgba, 7); - } -} - -inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) -{ - uint8x16x3_t rgb; - - if(alpha) - { - const auto tmp = vld4q_u8(ptr); - rgb.val[0] = tmp.val[0]; - rgb.val[1] = tmp.val[1]; - rgb.val[2] = tmp.val[2]; - } - else - { - rgb = vld3q_u8(ptr); - } - - return rgb; -} - -inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) -{ - // Convert the uint8x16_t to float32x4x4_t - const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]); - const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]); - const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]); - - const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]); - const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]); - const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]); - - float32x4x4_t fyvec_top, fuvec_top, fvvec_top; - float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; - - for(auto i = 0; i < 4; ++i) - { - rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], - fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); - rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], - fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); - } - - arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]); - arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]); -} - -inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, - const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, - unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, - unsigned char *const __restrict out_uv) -{ - uint8x16x3_t vec_top, vec_bottom; - vec_top.val[0] = rvec_top; - vec_top.val[1] = gvec_top; - vec_top.val[2] = bvec_top; - vec_bottom.val[0] = rvec_bottom; - vec_bottom.val[1] = gvec_bottom; - vec_bottom.val[2] = bvec_bottom; - - rgb_to_yuv_conversion(vec_top, vec_bottom); - - vst1q_u8(out_y_top, vec_top.val[0]); - vst1q_u8(out_y_bottom, vec_bottom.val[0]); - - const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); - const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); - const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); - const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); - - uint8x8x2_t uvvec; - uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); - uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); - - vst2_u8(out_uv, uvvec); -} - -inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, - const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, - unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, - unsigned char *const __restrict out_u, - unsigned char *const __restrict out_v) -{ - uint8x16x3_t vec_top, vec_bottom; - vec_top.val[0] = rvec_top; - vec_top.val[1] = gvec_top; - vec_top.val[2] = bvec_top; - vec_bottom.val[0] = rvec_bottom; - vec_bottom.val[1] = gvec_bottom; - vec_bottom.val[2] = bvec_bottom; - - rgb_to_yuv_conversion(vec_top, vec_bottom); - - vst1q_u8(out_y_top, vec_top.val[0]); - vst1q_u8(out_y_bottom, vec_bottom.val[0]); - - const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); - const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); - const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), - vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); - - vst1_u8(out_u, vget_low_u8(uvvec)); - vst1_u8(out_v, vget_high_u8(uvvec)); -} - -inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, - unsigned char *const __restrict out_y, - unsigned char *const __restrict out_u, - unsigned char *const __restrict out_v) -{ - // Convert the uint8x16_t to float32x4x4_t - const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec); - const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec); - const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec); - - float32x4x4_t fyvec, fuvec, fvvec; - for(auto i = 0; i < 4; ++i) - { - rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], - fyvec.val[i], fuvec.val[i], fvvec.val[i]); - } - - uint8x16_t yvec, uvec, vvec; - arm_compute::convert_float32x4x4_to_uint8x16(fyvec, yvec); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec, uvec); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec, vvec); - - vst1q_u8(out_y, yvec); - vst1q_u8(out_u, uvec); - vst1q_u8(out_v, vvec); -} -#endif /* DOXYGEN_SKIP_THIS */ -} - -namespace arm_compute -{ -/** Convert RGB to RGBX. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output RGBX buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld3q_u8(in.ptr()); - uint8x16x4_t ta2; - ta2.val[0] = ta1.val[0]; - ta2.val[1] = ta1.val[1]; - ta2.val[2] = ta1.val[2]; - ta2.val[3] = vdupq_n_u8(255); - vst4q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert RGB to U8. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output U8 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld3q_u8(in.ptr()); - uint8x16_t ta2; - rgb_to_u8_conversion(ta1, ta2); - vst1q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert RGBX to RGB. - * - * @param[in] input Input RGBX data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld4q_u8(in.ptr()); - uint8x16x3_t ta2; - ta2.val[0] = ta1.val[0]; - ta2.val[1] = ta1.val[1]; - ta2.val[2] = ta1.val[2]; - vst3q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert YUYV to RGB. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool yuyv, bool alpha> -void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - constexpr auto element_size = alpha ? 32 : 24; - constexpr auto shift = yuyv ? 0 : 1; - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta = vld4q_u8(in.ptr()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - // Convert the uint8x16x4_t to float32x4x4_t - const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]); - const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]); - const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]); - const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]); - - yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - }, - in, out); -} - -/** Convert NV12 to RGB. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool uv, bool alpha> -void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - constexpr auto element_size = alpha ? 32 : 24; - const auto out_stride = output_ptr->info()->strides_in_bytes().y(); - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]); - - yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - - yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); - }, - in_y, in_uv, out); -} - -/** Convert IYUV to RGB. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool alpha> -void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IImage *__restrict>(output); - - constexpr auto element_size = alpha ? 32 : 24; - const auto out_stride = output_ptr->info()->strides_in_bytes().y(); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto *y_top_ptr = in_y.ptr(); - const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y(); - const auto *u_ptr = in_u.ptr(); - const auto *v_ptr = in_v.ptr(); - - // Work-around issue in gcc 9(>=) where vld2q might cause issues with register allocation -#if defined(__arch64__) - const auto ta0_y_top = vld1q_u8(y_top_ptr); - const auto ta1_y_top = vld1q_u8(y_top_ptr + 16); - const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr); - const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16); - const auto ta_u = vld1q_u8(u_ptr); - const auto ta_v = vld1q_u8(v_ptr); - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_top, ta1_y_top)); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_top, ta1_y_top)); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_bottom, ta1_y_bottom)); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_bottom, ta1_y_bottom)); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v); -#else /* defined(__arch64__) */ - const auto ta_y_top = vld2q_u8(y_top_ptr); - const auto ta_y_bottom = vld2q_u8(y_bottom_ptr); - const auto ta_u = vld1q_u8(u_ptr); - const auto ta_v = vld1q_u8(v_ptr); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_u.val[0] = U0 U2 U4 U6 ... - //ta_v.val[0] = V0 V2 V4 V6 ... - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v); -#endif /* defined(__arch64__) */ - - yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - - yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); - }, - in_y, in_u, in_v, out); -} - -/** Convert YUYV to NV12. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool yuyv> -void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - constexpr auto shift = yuyv ? 0 : 1; - - // NV12's UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_top = vld4q_u8(in.ptr()); - const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - uint8x16x2_t yvec; - yvec.val[0] = ta_top.val[0 + shift]; - yvec.val[1] = ta_top.val[2 + shift]; - vst2q_u8(out_y.ptr(), yvec); - - uint8x16x2_t yyvec; - yyvec.val[0] = ta_bottom.val[0 + shift]; - yyvec.val[1] = ta_bottom.val[2 + shift]; - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); - - uint8x16x2_t uvvec; - uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); - uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); - vst2q_u8(out_uv.ptr(), uvvec); - }, - in, out_y, out_uv); -} - -/** Convert IYUV to NV12. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - uint8x16x2_t ta_uv; - ta_uv.val[0] = vld1q_u8(in_u.ptr()); - ta_uv.val[1] = vld1q_u8(in_v.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - vst2q_u8(out_uv.ptr(), ta_uv); - }, - in_y, in_u, in_v, out_y, out_uv); -} - -/** Convert NV12 to IYUV. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool uv> -void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); - vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); - }, - in_y, in_uv, out_y, out_u, out_v); -} - -/** Convert YUYV to IYUV. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool yuyv> -void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - constexpr auto shift = yuyv ? 0 : 1; - - // Destination's UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_top = vld4q_u8(in.ptr()); - const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - uint8x16x2_t yvec; - yvec.val[0] = ta_top.val[0 + shift]; - yvec.val[1] = ta_top.val[2 + shift]; - vst2q_u8(out_y.ptr(), yvec); - - uint8x16x2_t yyvec; - yyvec.val[0] = ta_bottom.val[0 + shift]; - yyvec.val[1] = ta_bottom.val[2 + shift]; - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); - - uint8x16_t uvec; - uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); - vst1q_u8(out_u.ptr(), uvec); - - uint8x16_t vvec; - vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); - vst1q_u8(out_v.ptr(), vvec); - }, - in, out_y, out_u, out_v); -} - -/** Convert NV12 to YUV4. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool uv> -void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - - uint8x16x2_t uvec; - uvec.val[0] = ta_uv.val[0 + shift]; - uvec.val[1] = ta_uv.val[0 + shift]; - vst2q_u8(out_u.ptr(), uvec); - vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); - - uint8x16x2_t vvec; - vvec.val[0] = ta_uv.val[1 - shift]; - vvec.val[1] = ta_uv.val[1 - shift]; - vst2q_u8(out_v.ptr(), vvec); - vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); - }, - in_y, in_uv, out_y, out_u, out_v); -} - -/** Convert IYUV to YUV4. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IMultiImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_u = vld1q_u8(in_u.ptr()); - const auto ta_v = vld1q_u8(in_v.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_u = U0 U2 U4 U6 ... - //ta_v = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - - uint8x16x2_t uvec; - uvec.val[0] = ta_u; - uvec.val[1] = ta_u; - vst2q_u8(out_u.ptr(), uvec); - vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); - - uint8x16x2_t vvec; - vvec.val[0] = ta_v; - vvec.val[1] = ta_v; - vst2q_u8(out_v.ptr(), vvec); - vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); - }, - in_y, in_u, in_v, out_y, out_u, out_v); -} - -/** Convert RGB to NV12. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool alpha> -void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb_top = load_rgb(in.ptr(), alpha); - const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], - ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], - out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), - out_uv.ptr()); - }, - in, out_y, out_uv); -} - -/** Convert RGB to IYUV. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool alpha> -void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb_top = load_rgb(in.ptr(), alpha); - const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], - ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], - out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), - out_u.ptr(), out_v.ptr()); - }, - in, out_y, out_u, out_v); -} - -/** Convert RGB to YUV4. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template <bool alpha> -void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast<const IImage *__restrict>(input); - const auto output_ptr = static_cast<IMultiImage *__restrict>(output); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb = load_rgb(in.ptr(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], - out_y.ptr(), out_u.ptr(), out_v.ptr()); - }, - in, out_y, out_u, out_v); -} -} // namespace arm_compute diff --git a/arm_compute/core/NEON/NEFixedPoint.inl b/arm_compute/core/NEON/NEFixedPoint.inl deleted file mode 100644 index 14e51d825c..0000000000 --- a/arm_compute/core/NEON/NEFixedPoint.inl +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <array> -#include <limits> - -namespace arm_compute -{ -#ifndef DOXYGEN_SKIP_THIS - -inline float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b) -{ - float32x4x2_t res = - { - { - vmaxq_f32(a.val[0], b.val[0]), - vmaxq_f32(a.val[1], b.val[1]) - } - }; - return res; -} -#endif /* DOXYGEN_SKIP_THIS */ -} // namespace arm_compute diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h deleted file mode 100644 index 38701f434a..0000000000 --- a/arm_compute/core/NEON/NEKernels.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEKERNELS_H -#define ARM_COMPUTE_NEKERNELS_H - -/* Header regrouping all the NEON kernels */ -#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h" -#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h" -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h" -#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h" -#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" -#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h" -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" -#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" -#include "arm_compute/core/NEON/kernels/NECopyKernel.h" -#include "arm_compute/core/NEON/kernels/NECropKernel.h" -#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h" -#include "arm_compute/core/NEON/kernels/NEDilateKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h" -#include "arm_compute/core/NEON/kernels/NEErodeKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEFloorKernel.h" -#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "arm_compute/core/NEON/kernels/NEGatherKernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h" -#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h" -#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" -#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" -#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" -#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" -#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h" -#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h" -#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h" -#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h" -#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" -#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h" -#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NERangeKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" -#include "arm_compute/core/NEON/kernels/NERemapKernel.h" -#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEReverseKernel.h" -#include "arm_compute/core/NEON/kernels/NEScaleKernel.h" -#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NESelectKernel.h" -#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h" -#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h" -#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" -#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h" -#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h" -#include "arm_compute/core/NEON/kernels/NETileKernel.h" -#include "arm_compute/core/NEON/kernels/NETransposeKernel.h" -#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEWarpKernel.h" -#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h" -#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h" - -#endif /* ARM_COMPUTE_NEKERNELS_H */ diff --git a/arm_compute/core/NEON/NEMath.h b/arm_compute/core/NEON/NEMath.h deleted file mode 100644 index 8827bbf459..0000000000 --- a/arm_compute/core/NEON/NEMath.h +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMATH_H -#define ARM_COMPUTE_NEMATH_H - -#include <arm_neon.h> -#include <array> - -namespace arm_compute -{ -/** Calculate floor of a vector. - * - * @param[in] val Input vector value in F32 format. - * - * @return The calculated floor vector. - */ -float32x4_t vfloorq_f32(float32x4_t val); - -/** Calculate round value of a vector to nearest with ties to even. - * - * @param[in] val Input vector value in F32 format. - * - * @return The calculated round vector. - */ -float32x4_t vroundq_rte_f32(float32x4_t val); - -/** Calculate inverse square root. - * - * @param[in] x Input value. - * - * @return The calculated inverse square root. - */ -float32x2_t vinvsqrt_f32(float32x2_t x); - -/** Calculate inverse square root. - * - * @param[in] x Input value. - * - * @return The calculated inverse square root. - */ -float32x4_t vinvsqrtq_f32(float32x4_t x); - -/** Calculate reciprocal. - * - * @param[in] x Input value. - * - * @return The calculated reciprocal. - */ -float32x2_t vinv_f32(float32x2_t x); - -/** Calculate reciprocal. - * - * @param[in] x Input value. - * - * @return The calculated reciprocal. - */ -float32x4_t vinvq_f32(float32x4_t x); - -/** Perform a 7th degree polynomial approximation using Estrin's method. - * - * @param[in] x Input vector value in F32 format. - * @param[in] coeffs Polynomial coefficients table. - * - * @return The calculated approximation. - */ -float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array<float32x4_t, 8> &coeffs); - -/** Calculate exponential - * - * @param[in] x Input vector value in F32 format. - * - * @return The calculated exponent. - */ -float32x4_t vexpq_f32(float32x4_t x); - -/** Calculate logarithm - * - * @param[in] x Input vector value in F32 format. - * - * @return The calculated logarithm. - */ -float32x4_t vlogq_f32(float32x4_t x); - -/** Calculate hyperbolic tangent. - * - * tanh(x) = (e^2x - 1)/(e^2x + 1) - * - * @note We clamp x to [-5,5] to avoid overflowing issues. - * - * @param[in] val Input vector value in F32 format. - * - * @return The calculated Hyperbolic Tangent. - */ -float32x4_t vtanhq_f32(float32x4_t val); - -/** Calculate n power of a number. - * - * pow(x,n) = e^(n*log(x)) - * - * @param[in] val Input vector value in F32 format. - * @param[in] n Powers to raise the input to. - * - * @return The calculated power. - */ -float32x4_t vpowq_f32(float32x4_t val, float32x4_t n); - -/** Round to the nearest division by a power-of-two using exponent - * - * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent - * - * @param[in] x Vector of 4 elements - * @param[in] exponent Vector of 4 elements with integer value used to round to nearest division by a power-of-two - * - * @return the nearest division by a power-of-two using exponent - */ -int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent); - -/** Round to the nearest division by a power-of-two using exponent - * - * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent - * - * @param[in] x Vector of 4 elements - * @param[in] exponent Integer value used to round to nearest division by a power-of-two - * - * @return the nearest division by a power-of-two using exponent - */ -int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent); - -/** Round to the nearest division by a power-of-two using exponent - * - * @note This function calculates the following expression: (x + 2^n -1 ) / 2^n where n = exponent - * - * @param[in] x Element to divide. - * @param[in] exponent Integer value used to round to nearest division by a power-of-two - * - * @return the nearest division by a power-of-two using exponent - */ -int32_t rounding_divide_by_pow2(int32_t x, int exponent); - -/** Converts from uint8x16 to float32x4x4_t - * - * @param[in] in Vector of uint8 to be converted - * - * @return Converted vector of float - */ -float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in); - -/** Converts from int8x16 to float32x4x4_t - * - * @param[in] in Vector of int8 to be converted - * - * @return Converted vector of float - */ -float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in); - -/** Converts to float32x4x4_t from the specified templated 16 elements vectors - * - * @param[in] in Vector of float to be converted - * - * @return Converted vector of float - */ -template <typename T> -float32x4x4_t convert_to_float32x4x4(const T &in); - -/** Converts from two float32x4x3_t to just one uint8x8x3_t - * - * @param[in] in1 First input vector of float to be converted - * @param[in] in2 Second input vector of float to be converted - * @param[out] out Converted output vector uint8 to store the result - */ -void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out); - -/** Converts from two float32x4x4_t to just one uint8x16_t - * - * @param[in] in Vector of float to be converted - * @param[out] out Converted vector of uint8 to store the result - */ -void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out); - -/** Converts from float32x4x4_t to just one int8x16_t - * - * @param[in] in Vector of float to be converted - * @param[out] out Converted vector of uint8 to store the result - */ -void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &out); - -/** Calculate sine. - * - * @param[in] val Input vector value in radians, F32 format. - * - * @return The calculated sine. - */ -float32x4_t vsinq_f32(float32x4_t val); - -/** Calculate sine. - * - * @param[in] val Input vector value in radians, F32 format. - * - * @return The calculated sine. - */ -float32x2_t vsin_f32(float32x2_t val); - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Calculate hyperbolic tangent. - * - * tanh(x) = (e^2x - 1)/(e^2x + 1) - * - * @note We clamp x to [-5,5] to avoid overflowing issues. - * - * @param[in] val Input vector value in F16 format. - * - * @return The calculated Hyperbolic Tangent. - */ -float16x8_t vtanhq_f16(float16x8_t val); - -/** Calculate round value of a vector to nearest with ties to even. - * - * @param[in] val Input vector value in F16 format. - * - * @return The calculated round vector. - */ -float16x8_t vroundq_rte_f16(float16x8_t val); - -/** Calculate reciprocal. - * - * @param[in] x Input value. - * - * @return The calculated reciprocal. - */ -float16x4_t vinv_f16(float16x4_t x); - -/** Calculate reciprocal. - * - * @param[in] x Input value. - * - * @return The calculated reciprocal. - */ -float16x8_t vinvq_f16(float16x8_t x); - -/** Calculate inverse square root. - * - * @param[in] x Input value. - * - * @return The calculated inverse square root. - */ -float16x4_t vinvsqrt_f16(float16x4_t x); - -/** Calculate inverse square root. - * - * @param[in] x Input value. - * - * @return The calculated inverse square root. - */ -float16x8_t vinvsqrtq_f16(float16x8_t x); - -/** Calculate exponential - * - * @param[in] x Input vector value in F16 format. - * - * @return The calculated exponent. - */ -float16x8_t vexpq_f16(float16x8_t x); - -/** Calculate n power of a number. - * - * pow(x,n) = e^(n*log(x)) - * - * @param[in] val Input vector value in F16 format. - * @param[in] n Powers to raise the input to. - * - * @return The calculated power. - */ -float16x8_t vpowq_f16(float16x8_t val, float16x8_t n); - -/** Calculate sine. - * - * @param[in] val Input vector value in radians, F16 format. - * - * @return The calculated sine. - */ -float16x8_t vsinq_f16(float16x8_t val); - -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#include "arm_compute/core/NEON/NEMath.inl" -#endif /* ARM_COMPUTE_NEMATH_H */ diff --git a/arm_compute/core/NEON/NEMath.inl b/arm_compute/core/NEON/NEMath.inl deleted file mode 100644 index 032bfde238..0000000000 --- a/arm_compute/core/NEON/NEMath.inl +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <cmath> -#include <limits> - -#ifndef M_PI -#define M_PI (3.14159265358979323846) -#endif // M_PI - -namespace arm_compute -{ -/** Exponent polynomial coefficients */ -const std::array<float32x4_t, 8> exp_tab = -{ - { - vdupq_n_f32(1.f), - vdupq_n_f32(0.0416598916054f), - vdupq_n_f32(0.500000596046f), - vdupq_n_f32(0.0014122662833f), - vdupq_n_f32(1.00000011921f), - vdupq_n_f32(0.00833693705499f), - vdupq_n_f32(0.166665703058f), - vdupq_n_f32(0.000195780929062f), - } -}; - -/** Logarithm polynomial coefficients */ -const std::array<float32x4_t, 8> log_tab = -{ - { - vdupq_n_f32(-2.29561495781f), - vdupq_n_f32(-2.47071170807f), - vdupq_n_f32(-5.68692588806f), - vdupq_n_f32(-0.165253549814f), - vdupq_n_f32(5.17591238022f), - vdupq_n_f32(0.844007015228f), - vdupq_n_f32(4.58445882797f), - vdupq_n_f32(0.0141278216615f), - } -}; - -/** Sin polynomial coefficients */ -constexpr float te_sin_coeff2 = 0.166666666666f; // 1/(2*3) -constexpr float te_sin_coeff3 = 0.05f; // 1/(4*5) -constexpr float te_sin_coeff4 = 0.023809523810f; // 1/(6*7) -constexpr float te_sin_coeff5 = 0.013888888889f; // 1/(8*9) - -#ifndef DOXYGEN_SKIP_THIS -inline float32x4_t vfloorq_f32(float32x4_t val) -{ - static const float32x4_t CONST_1 = vdupq_n_f32(1.f); - - const int32x4_t z = vcvtq_s32_f32(val); - const float32x4_t r = vcvtq_f32_s32(z); - - return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r); -} - -inline float32x4_t vroundq_rte_f32(float32x4_t val) -{ -#ifdef __aarch64__ - return vrndnq_f32(val); -#else // __aarch64__ - static const float32x4_t CONST_HALF_FLOAT = vdupq_n_f32(0.5f); - static const float32x4_t CONST_1_FLOAT = vdupq_n_f32(1.f); - static const int32x4_t CONST_1_INT = vdupq_n_s32(1); - const float32x4_t floor_val = vfloorq_f32(val); - const float32x4_t diff = vsubq_f32(val, floor_val); - - /* - * Select the floor value when (diff<0.5 || (diff==0.5 && floor_val%2==0). - * This condition is checked by vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT) ,vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT) , vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT),CONST_1_INT)))) - */ - - return vbslq_f32(vorrq_u32(vcltq_f32(diff, CONST_HALF_FLOAT), vandq_u32(vceqq_f32(diff, CONST_HALF_FLOAT), vmvnq_u32(vtstq_s32(vandq_s32(vcvtq_s32_f32(floor_val), CONST_1_INT), CONST_1_INT)))), - floor_val, vaddq_f32(floor_val, CONST_1_FLOAT)); -#endif // __aarch64__ -} - -inline float32x2_t vinvsqrt_f32(float32x2_t x) -{ - float32x2_t sqrt_reciprocal = vrsqrte_f32(x); - sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - sqrt_reciprocal = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - - return sqrt_reciprocal; -} - -inline float32x4_t vinvsqrtq_f32(float32x4_t x) -{ - float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - - return sqrt_reciprocal; -} - -inline float32x2_t vinv_f32(float32x2_t x) -{ - float32x2_t recip = vrecpe_f32(x); - recip = vmul_f32(vrecps_f32(x, recip), recip); - recip = vmul_f32(vrecps_f32(x, recip), recip); - return recip; -} - -inline float32x4_t vinvq_f32(float32x4_t x) -{ - float32x4_t recip = vrecpeq_f32(x); - recip = vmulq_f32(vrecpsq_f32(x, recip), recip); - recip = vmulq_f32(vrecpsq_f32(x, recip), recip); - return recip; -} - -inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const std::array<float32x4_t, 8> &coeffs) -{ - float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x); - float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x); - float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x); - float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x); - float32x4_t x2 = vmulq_f32(x, x); - float32x4_t x4 = vmulq_f32(x2, x2); - float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); - return res; -} - -inline float32x4_t vexpq_f32(float32x4_t x) -{ - static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) - static const float32x4_t CONST_INV_LN2 = vdupq_n_f32(1.4426950408f); // 1/ln(2) - static const float32x4_t CONST_INF = vdupq_n_f32(std::numeric_limits<float>::infinity()); - static const float32x4_t CONST_MAX_INPUT = vdupq_n_f32(88.7f); - static const float32x4_t CONST_0 = vdupq_n_f32(0.f); - static const int32x4_t CONST_NEGATIVE_126 = vdupq_n_s32(-126); - - // Perform range reduction [-log(2),log(2)] - int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2)); - float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2); - - // Polynomial Approximation - float32x4_t poly = vtaylor_polyq_f32(val, exp_tab); - - // Reconstruct - poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23))); - poly = vbslq_f32(vcltq_s32(m, CONST_NEGATIVE_126), CONST_0, poly); // Handle underflow - poly = vbslq_f32(vcgtq_f32(x, CONST_MAX_INPUT), CONST_INF, poly); // Handle overflow - - return poly; -} - -inline float32x4_t vlogq_f32(float32x4_t x) -{ - static const int32x4_t CONST_127 = vdupq_n_s32(127); // 127 - static const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) - - // Extract exponent - int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127); - float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); - - // Polynomial Approximation - float32x4_t poly = vtaylor_polyq_f32(val, log_tab); - - // Reconstruct - poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); - - return poly; -} - -inline float32x4_t vtanhq_f32(float32x4_t val) -{ - static const float32x4_t CONST_1 = vdupq_n_f32(1.f); - static const float32x4_t CONST_2 = vdupq_n_f32(2.f); - static const float32x4_t CONST_MIN_TANH = vdupq_n_f32(-10.f); - static const float32x4_t CONST_MAX_TANH = vdupq_n_f32(10.f); - - float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH); - float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x)); - float32x4_t num = vsubq_f32(exp2x, CONST_1); - float32x4_t den = vaddq_f32(exp2x, CONST_1); - float32x4_t tanh = vmulq_f32(num, vinvq_f32(den)); - return tanh; -} - -inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n) -{ - return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); -} - -inline float32x4_t vsinq_f32(float32x4_t val) -{ - const float32x4_t pi_v = vdupq_n_f32(M_PI); - const float32x4_t pio2_v = vdupq_n_f32(M_PI / 2); - const float32x4_t ipi_v = vdupq_n_f32(1 / M_PI); - - //Find positive or negative - const int32x4_t c_v = vabsq_s32(vcvtq_s32_f32(vmulq_f32(val, ipi_v))); - const uint32x4_t sign_v = vcleq_f32(val, vdupq_n_f32(0)); - const uint32x4_t odd_v = vandq_u32(vreinterpretq_u32_s32(c_v), vdupq_n_u32(1)); - - uint32x4_t neg_v = veorq_u32(odd_v, sign_v); - - //Modulus a - (n * int(a*(1/n))) - float32x4_t ma = vsubq_f32(vabsq_f32(val), vmulq_f32(pi_v, vcvtq_f32_s32(c_v))); - const uint32x4_t reb_v = vcgeq_f32(ma, pio2_v); - - //Rebase a between 0 and pi/2 - ma = vbslq_f32(reb_v, vsubq_f32(pi_v, ma), ma); - - //Taylor series - const float32x4_t ma2 = vmulq_f32(ma, ma); - - //2nd elem: x^3 / 3! - float32x4_t elem = vmulq_f32(vmulq_f32(ma, ma2), vdupq_n_f32(te_sin_coeff2)); - float32x4_t res = vsubq_f32(ma, elem); - - //3rd elem: x^5 / 5! - elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff3)); - res = vaddq_f32(res, elem); - - //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val) - elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff4)); - res = vsubq_f32(res, elem); - - //5th elem: x^9 / 9! - elem = vmulq_f32(vmulq_f32(elem, ma2), vdupq_n_f32(te_sin_coeff5)); - res = vaddq_f32(res, elem); - - //Change of sign - neg_v = vshlq_n_u32(neg_v, 31); - res = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(res), neg_v)); - return res; -} - -inline float32x2_t vsin_f32(float32x2_t val) -{ - const float32x2_t pi_v = vdup_n_f32(M_PI); - const float32x2_t pio2_v = vdup_n_f32(M_PI / 2); - const float32x2_t ipi_v = vdup_n_f32(1 / M_PI); - - //Find positive or negative - const int32x2_t c_v = vabs_s32(vcvt_s32_f32(vmul_f32(val, ipi_v))); - const uint32x2_t sign_v = vcle_f32(val, vdup_n_f32(0)); - const uint32x2_t odd_v = vand_u32(vreinterpret_u32_s32(c_v), vdup_n_u32(1)); - - uint32x2_t neg_v = veor_u32(odd_v, sign_v); - - //Modulus a - (n * int(a*(1/n))) - float32x2_t ma = vsub_f32(vabs_f32(val), vmul_f32(pi_v, vcvt_f32_s32(c_v))); - const uint32x2_t reb_v = vcge_f32(ma, pio2_v); - - //Rebase a between 0 and pi/2 - ma = vbsl_f32(reb_v, vsub_f32(pi_v, ma), ma); - - //Taylor series - const float32x2_t ma2 = vmul_f32(ma, ma); - - //2nd elem: x^3 / 3! - float32x2_t elem = vmul_f32(vmul_f32(ma, ma2), vdup_n_f32(te_sin_coeff2)); - float32x2_t res = vsub_f32(ma, elem); - - //3rd elem: x^5 / 5! - elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff3)); - res = vadd_f32(res, elem); - - //4th elem: x^7 / 7!float32x2_t vsin_f32(float32x2_t val) - elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff4)); - res = vsub_f32(res, elem); - - //5th elem: x^9 / 9! - elem = vmul_f32(vmul_f32(elem, ma2), vdup_n_f32(te_sin_coeff5)); - res = vadd_f32(res, elem); - - //Change of sign - neg_v = vshl_n_u32(neg_v, 31); - res = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(res), neg_v)); - return res; -} - -#endif /* DOXYGEN_SKIP_THIS */ - -inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int32x4_t exponent) -{ - const int32x4_t shift_vec = vnegq_s32(exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); - const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed_up_x, shift_vec); -} - -inline int32x4_t rounding_divide_by_pow2(int32x4_t x, int exponent) -{ - const int32x4_t shift_vec = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); - const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed_up_x, shift_vec); -} - -inline int32_t rounding_divide_by_pow2(int32_t x, int exponent) -{ - const int32_t mask = (1 << exponent) - 1; - const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); - return (x >> exponent) + ((x & mask) > threshold ? 1 : 0); -} - -inline float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in) -{ - float32x4x4_t out; - - const auto tmp1 = vmovl_u8(vget_low_u8(in)); - out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); - out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); - - const auto tmp2 = vmovl_u8(vget_high_u8(in)); - out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); - out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); - return out; -} - -inline float32x4x4_t convert_int8x16_to_float32x4x4(const int8x16_t &in) -{ - float32x4x4_t out; - - const auto tmp1 = vmovl_s8(vget_low_s8(in)); - out.val[0] = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp1))); - out.val[1] = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp1))); - - const auto tmp2 = vmovl_s8(vget_high_s8(in)); - out.val[2] = vcvtq_f32_s32(vmovl_s16(vget_low_s16(tmp2))); - out.val[3] = vcvtq_f32_s32(vmovl_s16(vget_high_s16(tmp2))); - return out; -} - -template <> -inline float32x4x4_t convert_to_float32x4x4(const uint8x16_t &in) -{ - return convert_uint8x16_to_float32x4x4(in); -} - -template <> -inline float32x4x4_t convert_to_float32x4x4(const int8x16_t &in) -{ - return convert_int8x16_to_float32x4x4(in); -} - -inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) -{ - out.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[0])), - vqmovn_u32(vcvtq_u32_f32(in2.val[0])))); - out.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[1])), - vqmovn_u32(vcvtq_u32_f32(in2.val[1])))); - out.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in1.val[2])), - vqmovn_u32(vcvtq_u32_f32(in2.val[2])))); -} - -inline void convert_float32x4x4_to_uint8x16(const float32x4x4_t &in, uint8x16_t &out) -{ - const auto low = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[0])), - vqmovn_u32(vcvtq_u32_f32(in.val[1]))); - const auto high = vcombine_u16(vqmovn_u32(vcvtq_u32_f32(in.val[2])), - vqmovn_u32(vcvtq_u32_f32(in.val[3]))); - out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); -} - -inline void convert_float32x4x4_to_int8x16(const float32x4x4_t &in, int8x16_t &out) -{ - const auto low = vcombine_s16(vqmovn_s32(vcvtq_s32_f32(in.val[0])), - vqmovn_s32(vcvtq_s32_f32(in.val[1]))); - const auto high = vcombine_s16(vqmovn_s32(vcvtq_s32_f32(in.val[2])), - vqmovn_s32(vcvtq_s32_f32(in.val[3]))); - out = vcombine_s8(vqmovn_s16(low), vqmovn_s16(high)); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Exponent polynomial coefficients */ -/** Logarithm polynomial coefficients */ -#ifndef DOXYGEN_SKIP_THIS -inline float16x8_t vfloorq_f16(float16x8_t val) -{ - static const float16x8_t CONST_1 = vdupq_n_f16(1.f); - - const int16x8_t z = vcvtq_s16_f16(val); - const float16x8_t r = vcvtq_f16_s16(z); - - return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, CONST_1), r); -} - -inline float16x8_t vroundq_rte_f16(float16x8_t val) -{ - return vrndnq_f16(val); -} - -inline float16x4_t vinvsqrt_f16(float16x4_t x) -{ - float16x4_t sqrt_reciprocal = vrsqrte_f16(x); - sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - sqrt_reciprocal = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - return sqrt_reciprocal; -} - -inline float16x8_t vinvsqrtq_f16(float16x8_t x) -{ - float16x8_t sqrt_reciprocal = vrsqrteq_f16(x); - sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - sqrt_reciprocal = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); - return sqrt_reciprocal; -} - -inline float16x4_t vinv_f16(float16x4_t x) -{ - float16x4_t recip = vrecpe_f16(x); - recip = vmul_f16(vrecps_f16(x, recip), recip); - recip = vmul_f16(vrecps_f16(x, recip), recip); - return recip; -} - -inline float16x8_t vinvq_f16(float16x8_t x) -{ - float16x8_t recip = vrecpeq_f16(x); - recip = vmulq_f16(vrecpsq_f16(x, recip), recip); - recip = vmulq_f16(vrecpsq_f16(x, recip), recip); - return recip; -} - -inline float16x8_t vtanhq_f16(float16x8_t val) -{ - const float16x8_t CONST_1 = vdupq_n_f16(1.f); - const float16x8_t CONST_2 = vdupq_n_f16(2.f); - const float16x8_t CONST_MIN_TANH = vdupq_n_f16(-10.f); - const float16x8_t CONST_MAX_TANH = vdupq_n_f16(10.f); - - const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH); - const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x)); - const float16x8_t num = vsubq_f16(exp2x, CONST_1); - const float16x8_t den = vaddq_f16(exp2x, CONST_1); - const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den)); - return tanh; -} - -inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const std::array<float16x8_t, 8> &coeffs) -{ - const float16x8_t A = vaddq_f16(coeffs[0], vmulq_f16(coeffs[4], x)); - const float16x8_t B = vaddq_f16(coeffs[2], vmulq_f16(coeffs[6], x)); - const float16x8_t C = vaddq_f16(coeffs[1], vmulq_f16(coeffs[5], x)); - const float16x8_t D = vaddq_f16(coeffs[3], vmulq_f16(coeffs[7], x)); - const float16x8_t x2 = vmulq_f16(x, x); - const float16x8_t x4 = vmulq_f16(x2, x2); - const float16x8_t res = vaddq_f16(vaddq_f16(A, vmulq_f16(B, x2)), vmulq_f16(vaddq_f16(C, vmulq_f16(D, x2)), x4)); - return res; -} - -inline float16x8_t vexpq_f16(float16x8_t x) -{ - // TODO (COMPMID-1535) : Revisit FP16 approximations - const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x)); - const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x)); - - const float16x8_t res = vcombine_f16(vcvt_f16_f32(vexpq_f32(x_low)), vcvt_f16_f32(vexpq_f32(x_high))); - return res; -} - -inline float16x8_t vlogq_f16(float16x8_t x) -{ - // TODO (COMPMID-1535) : Revisit FP16 approximations - const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x)); - const float32x4_t x_low = vcvt_f32_f16(vget_low_f16(x)); - - const float16x8_t res = vcombine_f16(vcvt_f16_f32(vlogq_f32(x_low)), vcvt_f16_f32(vlogq_f32(x_high))); - return res; -} - -inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n) -{ - // TODO (giaiod01) - COMPMID-1535 - float32x4_t n0_f32 = vcvt_f32_f16(vget_low_f16(n)); - float32x4_t n1_f32 = vcvt_f32_f16(vget_high_f16(n)); - float32x4_t val0_f32 = vcvt_f32_f16(vget_low_f16(val)); - float32x4_t val1_f32 = vcvt_f32_f16(vget_high_f16(val)); - - float32x4_t res0_f32 = vexpq_f32(vmulq_f32(n0_f32, vlogq_f32(val0_f32))); - float32x4_t res1_f32 = vexpq_f32(vmulq_f32(n1_f32, vlogq_f32(val1_f32))); - - return vcombine_f16(vcvt_f16_f32(res0_f32), vcvt_f16_f32(res1_f32)); -} - -inline float16x8_t vsinq_f16(float16x8_t val) -{ - const float32x4_t val_high = vcvt_f32_f16(vget_high_f16(val)); - const float32x4_t val_low = vcvt_f32_f16(vget_low_f16(val)); - - const float32x4_t res_high = vsinq_f32(val_high); - const float32x4_t res_low = vsinq_f32(val_low); - - return vcombine_f16(vcvt_f16_f32(res_low), vcvt_f16_f32(res_high)); -} - -inline float16x4_t vsin_f16(float16x4_t val) -{ - const float32x4_t val_f32 = vcvt_f32_f16(val); - const float32x2_t val_high = vget_high_f32(val_f32); - const float32x2_t val_low = vget_low_f32(val_f32); - - const float32x2_t res_high = vsin_f32(val_high); - const float32x2_t res_low = vsin_f32(val_low); - - return vcvt_f16_f32(vcombine_f32(res_low, res_high)); -} - -#endif /* DOXYGEN_SKIP_THIS */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute diff --git a/arm_compute/core/NEON/NESymm.h b/arm_compute/core/NEON/NESymm.h deleted file mode 100644 index d6c5a7073a..0000000000 --- a/arm_compute/core/NEON/NESymm.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESYMM_H -#define ARM_COMPUTE_NESYMM_H - -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include <arm_neon.h> - -namespace arm_compute -{ -using qsymm8_t = int8_t; /**< 8 bit quantized symmetric scalar value */ -using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */ - -using qsymm16x8_t = int16x8_t; /**< 16 bit quantized symmetric vector with 8 elements */ -using qsymm16x8x2_t = int16x8x2_t; /**< 16 bit quantized symmetric vector with 16 elements */ - -/** Performs final quantization step on 8 signed 16-bit elements - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param[in] in_s32 Input to be quantized. - * @param[in] result_fixedpoint_multiplier Result multiplier parameter - * @param[in] result_shift Result shift parameter - * @param[in] min_s16 Relu lower bound - * @param[in] max_s16 Relu upper bound - * - * @return Quantized values - */ -template <bool is_bounded_relu> -int16x8_t finalize_quantization_int16(int32x4x2_t &in_s32, - int result_fixedpoint_multiplier, - int32_t result_shift, - int16x8_t min_s16, - int16x8_t max_s16) -{ - if(result_shift < 0) - { - in_s32.val[0] = vmulq_n_s32(in_s32.val[0], (1 << -result_shift)); - in_s32.val[1] = vmulq_n_s32(in_s32.val[1], (1 << -result_shift)); - - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - in_s32.val[0] = vqrdmulhq_n_s32(in_s32.val[0], result_fixedpoint_multiplier); - in_s32.val[1] = vqrdmulhq_n_s32(in_s32.val[1], result_fixedpoint_multiplier); - // Round to the nearest division by a power-of-two using result_shift_s32 - in_s32.val[0] = rounding_divide_by_pow2(in_s32.val[0], result_shift); - in_s32.val[1] = rounding_divide_by_pow2(in_s32.val[1], result_shift); - } - - // Convert S32 to S16 - int16x8_t out_s16 = vcombine_s16(vqmovn_s32(in_s32.val[0]), vqmovn_s32(in_s32.val[1])); - - if(is_bounded_relu) - { - out_s16 = vmaxq_s16(out_s16, min_s16); - out_s16 = vminq_s16(out_s16, max_s16); - } - - return out_s16; -} - -/** Performs final quantization step on single signed 16-bit element - * - * @tparam is_bounded_relu Specified if a fused bounded relu should be applied - * - * @param[in] in_value Input to be quantized. - * @param[in] result_fixedpoint_multiplier Result multiplier parameter - * @param[in] result_shift Result shift parameter - * @param[in] min_s16 Relu lower bound - * @param[in] max_s16 Relu upper bound - * - * @return Quantized values - */ -template <bool is_bounded_relu> -inline int16_t finalize_quantization_int16(int32_t in_value, int result_fixedpoint_multiplier, - int32_t result_shift, int16_t min_s16, int16_t max_s16) -{ - if(result_shift < 0) - { - const int64_t in_64 = static_cast<int64_t>(in_value) * (1 << (-result_shift)) * static_cast<int64_t>(result_fixedpoint_multiplier); - in_value = static_cast<int32_t>((in_64 + (1 << 30)) >> 31); - } - else - { - // Fixed point multiplication with vector saturating rounding doubling multiply high with scalar - const int64_t in_64 = static_cast<int64_t>(in_value) * static_cast<int64_t>(result_fixedpoint_multiplier); - // Shift value by result_shift_s32 - in_value = rounding_divide_by_pow2(static_cast<int32_t>((in_64 + (1 << 30)) >> 31), result_shift); - } - - // Bound the result - int16_t out_s16 = static_cast<int16_t>(std::max<int32_t>(-32768, std::min<int32_t>(32767, in_value))); - - if(is_bounded_relu) - { - out_s16 = static_cast<int16_t>(std::max(min_s16, std::min(max_s16, out_s16))); - } - - return out_s16; -} - -/** Dequantize a neon vector holding 8 16-bit quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] scale Quantization scale - * - * @return Dequantized values in a neon vector - */ -inline float32x4x2_t vdequantize_int16(const int16x8_t &qv, float scale) -{ - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x2_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv))), vscale) - } - }; - return vdequantized_input; -} - -/** Quantize a neon vector holding 8 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] scale Quantization scale - * - * @return A neon vector holding the quantized values - */ -inline int16x8_t vquantize_int16(const float32x4x2_t &qv, float scale) -{ - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - - const int32x4x2_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) -#else //__aarch64__ - vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), - vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)) -#endif //__aarch64__ - } - }; - return vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])); -} - -/** Dequantize a neon vector holding 16 16-bit quantized values. - * - * @param[in] qv Input values to be dequantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return Dequantized values in a neon vector - */ -inline float32x4x4_t vdequantize(const int16x8x2_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - const float32x4_t vscale = vdupq_n_f32(scale); - const float32x4x4_t vdequantized_input = - { - { - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[0]))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[0]))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(qv.val[1]))), vscale), - vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(qv.val[1]))), vscale), - } - }; - return vdequantized_input; -} - -/** Quantize a neon vector holding 16 floating point values. - * - * @param[in] qv Input values to be quantized. - * @param[in] qi Quantization information to be used in the computation. - * - * @return A neon vector holding the quantized values - */ -inline qsymm16x8x2_t vquantize_qsymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi) -{ - const float scale = qi.scale; - ARM_COMPUTE_ERROR_ON(scale == 0.f); - const float32x4_t vinvscale = vdupq_n_f32(1.f / scale); - const int32x4x4_t rf = - { - { -#ifdef __aarch64__ - vcvtnq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), - vcvtnq_s32_f32(vmulq_f32(qv.val[1], vinvscale)), - vcvtnq_s32_f32(vmulq_f32(qv.val[2], vinvscale)), - vcvtnq_s32_f32(vmulq_f32(qv.val[3], vinvscale)), -#else //__aarch64__ - vcvtq_s32_f32(vmulq_f32(qv.val[0], vinvscale)), - vcvtq_s32_f32(vmulq_f32(qv.val[1], vinvscale)), - vcvtq_s32_f32(vmulq_f32(qv.val[2], vinvscale)), - vcvtq_s32_f32(vmulq_f32(qv.val[3], vinvscale)), -#endif //__aarch64__ - } - }; - const qsymm16x8x2_t res = - { - vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])), - vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])), - }; - - return res; -} - -/** Multiply a neon vector using quantized multiplier and shift - * - * @param[in] input Input vector to mutiply values to be quantized. - * @param[in] qmul Quantized multipler - * @param[in] shift Left bit shift - * - * @return A neon vector holding the multiplied value - */ -inline int32x4x2_t multiply_by_quantized_multiplier_2row(int32x4x2_t input, int32_t qmul, int32_t shift) -{ - const auto left_shift = shift > 0 ? shift : 0; - const auto right_shift = shift > 0 ? 0 : -shift; - const auto one_shifted = 1 << left_shift; - - int32x4x2_t result; - result.val[0] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[0], one_shifted), qmul), right_shift); - result.val[1] = rounding_divide_by_pow2(vqrdmulhq_n_s32(vmulq_n_s32(input.val[1], one_shifted), qmul), right_shift); - - return result; -} - -} // namespace arm_compute -#endif // ARM_COMPUTE_NESYMM_H diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h deleted file mode 100644 index 7d35e40284..0000000000 --- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the absolute difference kernel - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class NEAbsoluteDifferenceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEAbsoluteDifferenceKernel"; - } - /** Default constructor */ - NEAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~NEAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output tensors - * - * @param[in] input1 Source tensor. Data types supported: U8/S16 - * @param[in] input2 Source tensor. Data types supported: U8/S16 - * @param[out] output Destination tensor, Data types supported: U8/S16 - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised absolute difference functions - * - * @param[in] input1 An input tensor. Data types supported: U8/S16. - * @param[in] input2 An input tensor. Data types supported: U8/S16. - * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16. - * @param[in] window Region on which to execute the kernel. - */ - using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); - - /** Absolute difference function to use for the particular tensor formats passed to configure() */ - AbsDiffFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h deleted file mode 100644 index 367385dd7a..0000000000 --- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H -#define ARM_COMPUTE_NEACCUMULATEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Interface for the accumulate kernel - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class NEAccumulateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateKernel"; - } - /** Set the input and accumulation tensors - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] accum Destination tensor. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; - -/** Interface for the accumulate weighted kernel - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class NEAccumulateWeightedKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedKernel"; - } - /** Default constructor */ - NEAccumulateWeightedKernel(); - /** Set the input and accumulation tensors, and the scale value - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] alpha Scalar value in the range [0.0f, 1.0f] - * @param[in,out] accum Accumulated tensor. Data type supported: U8. - */ - void configure(const ITensor *input, float alpha, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - float _alpha; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Interface for the accumulate weighted kernel using F16 */ -class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel -{ -public: - const char *name() const override - { - return "NEAccumulateWeightedFP16Kernel"; - } - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** Interface for the accumulate weighted kernel using F16 */ -using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -/** Interface for the accumulate squared kernel - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class NEAccumulateSquaredKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEAccumulateSquaredKernel"; - } - /** Default constructor */ - NEAccumulateSquaredKernel(); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] shift Shift value in the range of [0, 15] - * @param[in,out] accum Accumulated tensor. Data type supported: S16. - */ - void configure(const ITensor *input, uint32_t shift, ITensor *accum); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - uint32_t _shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h deleted file mode 100644 index 82103b988b..0000000000 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/utils/misc/Traits.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include <arm_fp16.h> -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace arm_compute -{ -class ITensor; - -/** Interface for the activation layer kernel. */ -class NEActivationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEActivationLayerKernel"; - } - /** Constructor */ - NEActivationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel(const NEActivationLayerKernel &) = delete; - /** Default move constructor */ - NEActivationLayerKernel(NEActivationLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete; - /** Default move assignment operator */ - NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer information. - */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using ActivationFunction = ActivationLayerInfo::ActivationFunction; - /** Common signature for all the specialised @ref NEActivationLayerKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type - activation(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type activation(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<std::is_same<T, qasymm8_signed_t>::value, void>::type activation(const Window &window); - /** Function to apply an activation function on a tensor. - * - * @param[in] window Region on which to execute the kernel - */ - template <ActivationLayerInfo::ActivationFunction F, typename T> - typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type activation(const Window &window); - -private: - ITensor *_input; - ITensor *_output; - ActivationFunctionExecutorPtr _func; - ActivationLayerInfo _act_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h deleted file mode 100644 index 36d257b886..0000000000 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H -#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform addition between two tensors */ -class NEArithmeticAdditionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEArithmeticAdditionKernel"; - } - /** Default constructor */ - NEArithmeticAdditionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default; - /** Default destructor */ - ~NEArithmeticAdditionKernel() = default; - - /** Initialise the kernel's input, output and border mode. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] policy Overflow policy. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] policy Overflow policy. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised add functions - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. - * @param[in] policy Overflow policy. - * @param[in] window Region on which to execute the kernel. - */ - using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window); - /** Add function to use for the particular tensor types passed to configure() */ - AddFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - ConvertPolicy _policy; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h deleted file mode 100644 index f75c6bfb98..0000000000 --- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H -#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform subtraction between two tensors */ -class NEArithmeticSubtractionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEArithmeticSubtractionKernel"; - } - /** Default constructor */ - NEArithmeticSubtractionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default; - /** Default destructor */ - ~NEArithmeticSubtractionKernel() = default; - - /** Initialise the kernel's input and output. - * - * Valid configurations (Input1,Input2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (QASYMM8, QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. - * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel - * - * @note Convert policy cannot be WRAP if datatype is QASYMM8 - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised sub functions - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32. - * @param[in] window Region on which to execute the kernel. - * @param[in] is_sat Flag to indicate if the policy is SATURATE. - */ - using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat); - /** Sub function to use for the particular tensor types passed to configure() */ - SubFunction *_func; - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - ConvertPolicy _policy; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h deleted file mode 100644 index f943744ba0..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEBatchConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchConcatenateLayerKernel"; - } - /** Default constructor */ - NEBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ITensor *input, unsigned int batch_offset, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); - -private: - BatchConcatFunction *_func; - const ITensor *_input; - ITensor *_output; - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h deleted file mode 100644 index d59ed7baf0..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the batch normalization layer kernel. - */ -class NEBatchNormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchNormalizationLayerKernel"; - } - /** Default constructor */ - NEBatchNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - */ - void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f, - ActivationLayerInfo act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input - * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input - * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Configure execution function in case of non-fused activation **/ - void configure_non_fused(); - /** Configure execution function in case of fused activation **/ - void configure_fused(); - - /** Template function to run batch normalization on fp16 - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool fused_activation, typename F> - void batch_normalization_fp16_nchw(const Window &window); - /** Template function to run batch normalization on fp16 on tensors with NHWC format - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool fused_activation, typename F> - void batch_normalization_fp16_nhwc(const Window &window); - /** Template function to run batch normalization on fp32 - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * @tparam F Activation function functor to run - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool fused_activation, typename F> - void batch_normalization_fp32_nchw(const Window &window); - /** Template function to run batch normalization on fp32 on tensors with NHWC format - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * @tparam F Activation function functor to run - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool fused_activation, typename F> - void batch_normalization_fp32_nhwc(const Window &window); - /** Common signature for all the batch normalization functions - * - * @param[in] window Region on which to execute the kernel. - */ - using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window); - -private: - BatchNormFunctionPtr _func; - ITensor *_input; - ITensor *_output; - const ITensor *_mean; - const ITensor *_var; - const ITensor *_gamma; - const ITensor *_beta; - float _epsilon; - ActivationLayerInfo _act_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h deleted file mode 100644 index 61e47b0ea4..0000000000 --- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the batch to space kernel */ -class NEBatchToSpaceLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBatchToSpaceLayerKernel"; - } - /** Default constructor */ - NEBatchToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~NEBatchToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); - /** Initialise the kernel's inputs and output (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape). - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - const ITensor *_block_shape; /**< Block shape tensor */ - ITensor *_output; /**< Destination tensor */ - DataLayout _data_layout; /**< Data layout to be used at run-time */ - - int32_t _block_shape_x; - int32_t _block_shape_y; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h deleted file mode 100644 index 7a777678dc..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H -#define ARM_COMPUTE_NEBITWISEANDKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f] - */ -class NEBitwiseAndKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseAndKernel"; - } - /** Default constructor */ - NEBitwiseAndKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output Output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h deleted file mode 100644 index 3fb8c083a8..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H -#define ARM_COMPUTE_NEBITWISENOTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise NOT operation - * - * Result is computed by: - * @f[ output(x,y) = \lnot input(x,y) @f] - */ -class NEBitwiseNotKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseNotKernel"; - } - /** Default constructor */ - NEBitwiseNotKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default; - /** Initialise the kernel's input and output - * - * @param[in] input An input tensor. Data type supported: U8. - * @param[out] output The output tensor. Data type supported: U8. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h deleted file mode 100644 index 5b532510ad..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H -#define ARM_COMPUTE_NEBITWISEORKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise inclusive OR between two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f] - */ -class NEBitwiseOrKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseOrKernel"; - } - /** Default constructor */ - NEBitwiseOrKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output Output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h deleted file mode 100644 index 0d9120501b..0000000000 --- a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H -#define ARM_COMPUTE_NEBITWISEXORKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors - * - * Result is computed by: - * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f] - */ -class NEBitwiseXorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBitwiseXorKernel"; - } - /** Default constructor */ - NEBitwiseXorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input1 An input tensor. Data type supported: U8. - * @param[in] input2 An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input1; /**< Source tensor 1 */ - const ITensor *_input2; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h deleted file mode 100644 index e94f228f2a..0000000000 --- a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H -#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the bounding box kernel */ -class NEBoundingBoxTransformKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEBoundingBoxTransformKernel"; - } - - /** Default constructor */ - NEBoundingBoxTransformKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete; - /** Allow instances of this class to be moved */ - NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default; - /** Allow instances of this class to be moved */ - NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default; - /** Default destructor */ - ~NEBoundingBoxTransformKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - */ - void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform - * - * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32. - * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input - * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes. - * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input. - * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo. - * - * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. - * - * @return a Status - */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <typename T> - void internal_run(const Window &window); - - const ITensor *_boxes; - ITensor *_pred_boxes; - const ITensor *_deltas; - BoundingBoxTransformInfo _bbinfo; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h deleted file mode 100644 index 448e33be3c..0000000000 --- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H -#define ARM_COMPUTE_NEBOX3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Box 3x3 filter */ -class NEBox3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEBox3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform a Box 3x3 filter for FP16 datatype - */ -class NEBox3x3FP16Kernel : public NEBox3x3Kernel -{ -public: - const char *name() const override - { - return "NEBox3x3FP16Kernel"; - } - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */ -using NEBox3x3FP16Kernel = NEBox3x3Kernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h deleted file mode 100644 index 1979c5bd2b..0000000000 --- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H -#define ARM_COMPUTE_NECANNYEDGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Computes magnitude and quantised phase from inputs gradients. */ -class NEGradientKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGradientKernel"; - } - /** Default constructor */ - NEGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel(const NEGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGradientKernel &operator=(const NEGradientKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGradientKernel(NEGradientKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGradientKernel &operator=(NEGradientKernel &&) = default; - /** Default destructor */ - virtual ~NEGradientKernel() = default; - - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and magnitude must all be the same size (either 16 or 32) - * - * @param[in] gx Source tensor - Gx component. Data type supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data type supported: same as @p gx. - * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32). - * @param[out] phase Destination tensor - Quantized phase. Data type supported: U8. - * @param[in] norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm - */ - virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Common signature for all the specialised gradient functions - * - * @param[in] gx_ptr Pointer to the first input tensor. - * @param[in] gy_ptr Pointer to the second input tensor. - * @param[out] magnitude_ptr Pointer to the first output tensor - * @param[out] phase_ptr Pointer to the second output tensor - */ - using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr); - - GradientFunction *_func; /**< Gradient function to use for the particular tensor types passed to configure() */ - const ITensor *_gx; /**< Source tensor - Gx component */ - const ITensor *_gy; /**< Source tensor - Gy component */ - ITensor *_magnitude; /**< Destination tensor - Magnitude */ - ITensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** NEON kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE. - * - * @note Hysteresis is computed in @ref NEEdgeTraceKernel - */ -class NEEdgeNonMaxSuppressionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeNonMaxSuppressionKernel"; - } - /** Default constructor */ - NEEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default; - /** Default destructor */ - ~NEEdgeNonMaxSuppressionKernel() = default; - - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data type supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data type supported: U8. - * @param[out] output Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Common signature for all the specialised non-maxima suppression functions - * - * @param[in] magnitude_ptr Pointer to the first input tensor. - * @param[in] phase_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor - * @param[in] stride_mag Stride of the magnitude tensor - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - */ - using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr, - const int32_t lower_thr); - - EdgeNonMaxSupprFunction *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ - const ITensor *_magnitude; /**< Source tensor - Magnitude */ - const ITensor *_phase; /**< Source tensor - Quantized phase */ - ITensor *_output; /**< Destination tensor */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis */ -}; - -/** NEON kernel to perform Edge tracing */ -class NEEdgeTraceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEEdgeTraceKernel"; - } - /** Default constructor */ - NEEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default; - /** Default constructor */ - ~NEEdgeTraceKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in,out] input Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge" - * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge). - */ - void configure(ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; - -private: - ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h deleted file mode 100644 index 8f019384d9..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include <array> -#include <cstdint> - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel combine kernel */ -class NEChannelCombineKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEChannelCombineKernel"; - } - /** Default constructor */ - NEChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel(const NEChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel(NEChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default; - /** Default destructor */ - ~NEChannelCombineKernel() = default; - - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[in] plane3 The 2D plane that forms channel 3. Data type supported: U8 - * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - */ - void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Data type supported: U8 - * @param[in] plane1 The 2D plane that forms channel 1. Data type supported: U8 - * @param[in] plane2 The 2D plane that forms channel 2. Data type supported: U8 - * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444 - */ - void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Combine 3 planes to form a three channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_3C(const Window &win); - /** Combine 4 planes to form a four channel single plane tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_4C(const Window &win); - /** Combine 3 planes to form a single plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - template <bool is_yuyv> - void combine_YUV_1p(const Window &win); - /** Combine 3 planes to form a two plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_2p(const Window &win); - /** Combine 3 planes to form a three plane YUV tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void combine_YUV_3p(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win, uint32_t plane_id); - /** Common signature for all the specialised ChannelCombine functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window); - /** ChannelCombine function to use for the particular tensor types passed to configure() */ - ChannelCombineFunction _func; - std::array<const ITensor *, 4> _planes; - ITensor *_output; - IMultiImage *_output_multi; - std::array<uint32_t, 3> _x_subsampling; - std::array<uint32_t, 3> _y_subsampling; - unsigned int _num_elems_processed_per_iteration; - bool _is_parallelizable; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h deleted file mode 100644 index 8d62016fe5..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the channel extract kernel */ -class NEChannelExtractKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEChannelExtractKernel"; - } - /** Default constructor */ - NEChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel(const NEChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel(NEChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default; - /** Default destructor */ - ~NEChannelExtractKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Format supported: u8 - */ - void configure(const ITensor *input, Channel channel, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar destination image. Format supported: U8 - */ - void configure(const IMultiImage *input, Channel channel, IImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Extract one channel from a two channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_2C_img(const Window &win); - /** Extract one channel from a three channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_3C_img(const Window &win); - /** Extract one channel from a four channel planar tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_1C_from_4C_img(const Window &win); - /** Extract U/V channel from a single planar YUVY/UYVY tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void extract_YUYV_uv(const Window &win); - /** Copies a full plane to the output tensor. - * - * @param[in] win Region on which to execute the kernel. - */ - void copy_plane(const Window &win); - /** Common signature for all the specialised ChannelExtract functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window); - /** ChannelExtract function to use for the particular tensor types passed to configure() */ - ChannelExtractFunction _func; - unsigned int _lut_index; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h deleted file mode 100644 index 71659c4fcb..0000000000 --- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H -#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the channel shuffle kernel */ -class NEChannelShuffleLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEChannelShuffleLayerKernel"; - } - /** Default constructor */ - NEChannelShuffleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default; - /** Default destructor */ - ~NEChannelShuffleLayerKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - */ - void configure(const ITensor *input, ITensor *output, unsigned int num_groups); - /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _num_groups; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h deleted file mode 100644 index 9aa1062622..0000000000 --- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H -#define ARM_COMPUTE_NECOL2IMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform col2im reshaping. - * - * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel. - * - * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: - * - * @f[ - * \left( \begin{array}{ccccccccc} - * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccc} - * a0 & a1 & a2 \\ - * a3 & a4 & a5 \\ - * a6 & a7 & a8 \\ - * \end{array} \right) - * @f] - */ -class NECol2ImKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECol2ImKernel"; - } - /** Default constructor */ - NECol2ImKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECol2ImKernel(const NECol2ImKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECol2ImKernel &operator=(const NECol2ImKernel &) = delete; - /** Allow instances of this class to be moved */ - NECol2ImKernel(NECol2ImKernel &&) = default; - /** Allow instances of this class to be moved */ - NECol2ImKernel &operator=(NECol2ImKernel &&) = default; - /** Default destructor */ - ~NECol2ImKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - */ - void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); - /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel - * - * @param[in] input The input tensor to convert. Data types supported: All - * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input - * @param[in] convolved_dims Output convolved dimensions. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the col2im - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T> - void run_col2im(const Window &window); - - /** Common signature for all the specialised col2im functions - * - * @param[in] window Region on which to execute the kernel. - */ - using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window); - - Col2ImFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - Size2D _convolved_dims; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h deleted file mode 100644 index 3059288ab4..0000000000 --- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H -#define ARM_COMPUTE_COLORCONVERTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class IMultiImage; -class ITensor; -using IImage = ITensor; - -/** Interface for the color convert kernel */ -class NEColorConvertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEColorConvertKernel"; - } - /** Default constructor */ - NEColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel(const NEColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEColorConvertKernel(NEColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default; - /** Default destructor */ - ~NEColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ITensor *input, ITensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const IMultiImage *input, IImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const IImage *input, IMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const IMultiImage *input, IMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win); - const void *_input; - void *_output; - ColorConvertFunction *_func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h deleted file mode 100644 index d45191949a..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H -#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. - * - * @note This function can be applied to the 2D weights used by a Fully Connected layer if: - * - It follows a Convolution layer - * - The data layout used by the network does not match the one the model has been trained in. - * - * @note This function assumes the weights are already reshaped (transposed) - */ -class NEConvertFullyConnectedWeightsKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvertFullyConnectedWeightsKernel"; - } - /** Default constructor */ - NEConvertFullyConnectedWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default; - /** Default destructor */ - ~NEConvertFullyConnectedWeightsKernel() = default; - /** Set the input and output tensor. - * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. - * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel - * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. - * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. - * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the permute - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T> - void run_convert_fc_weights(const Window &window); - - const ITensor *_input; - ITensor *_output; - unsigned int _factor1; /* equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */ - unsigned int _factor2; /* equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h deleted file mode 100644 index 6ec2793484..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H -#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */ -class NEConvertQuantizedSignednessKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvertQuantizedSignednessKernel"; - } - /** Default constructor */ - NEConvertQuantizedSignednessKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data types supported: opposite of @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data types supported: opposite of @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h deleted file mode 100644 index 2b271de56b..0000000000 --- a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H -#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" - -#include <array> -#include <cstdint> -#include <vector> - -namespace arm_compute -{ -class ITensor; - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). - * The client can supply a convolution matrix \f$ C_{m,n} \f$. - * @f{eqnarray}{ - * k_0 &=& \frac{m}{2} \\ - * l_0 &=& \frac{n}{2} \\ - * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} - * @f} - * - * @note The above equation for this function is similar to the default OpenCV Filter2D function, - * which actually computes a correlation and not a convolution. - * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. - */ -template <unsigned int matrix_size> -class NEConvolutionKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEConvolutionKernel"; - } - /** Default constructor */ - NEConvolutionKernel(); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - template <typename OutputType> - void convolution(const Window &win); - -protected: - uint32_t _scale; /**< scale of the convolution */ - std::array<int16_t, matrix_size *matrix_size> _convolution; /**< convolution matrix */ -}; - -/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/ -using NEConvolution3x3Kernel = NEConvolutionKernel<3>; -/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/ -using NEConvolution5x5Kernel = NEConvolutionKernel<5>; -/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/ -using NEConvolution7x7Kernel = NEConvolutionKernel<7>; -///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/ -using NEConvolution9x9Kernel = NEConvolutionKernel<9>; - -/****************************************************************************************\ - * Separable Square Convolution * -\****************************************************************************************/ - -/** Kernel for the Horizontal pass of a Separable Convolution */ -template <unsigned int matrix_size> -class NESeparableConvolutionHorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NESeparableConvolutionHorKernel"; - } - /** Default constructor */ - NESeparableConvolutionHorKernel(); - - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data types supported: U16, S16, S32. - * @param[in] conv_row Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Apply the object's convolution to the given window of the input tensor.. - * - * @param[in] window Window to apply the convolution on. - */ - template <typename OutputType> - void convolve(const Window &window); - - std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>; -/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>; -/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/ -using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>; - -/** Kernel for the Vertical pass of a Separable Convolution */ -template <unsigned int matrix_size> -class NESeparableConvolutionVertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NESeparableConvolutionVertKernel"; - } - /** Default constructor */ - NESeparableConvolutionVertKernel(); - - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U16, S16, S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv_col Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as U16. - * - * @param[in] win Window to apply the convolution on. - */ - template <typename OutputType> - void convolution_u16(const Window &win); - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as S16. - * - * @param[in] win Window to apply the convolution on. - */ - template <typename OutputType> - void convolution_s16(const Window &win); - /** Apply the object's convolution to the given window of the input tensor. - * This function is used if the intermediate values have been stored as S32. - * - * @param[in] win Window to apply the convolution on. - */ - template <typename OutputType> - void convolution_s32(const Window &win); - - std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */ - uint32_t _scale; /**< Convolution's scale */ -}; - -/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/ -using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>; -/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/ -using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>; -/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/ -using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>; - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -/** Kernel for the running convolution on a rectangle matrix. - * - * @note Supports combinations of 3,5,7 and 9. - */ -class NEConvolutionRectangleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEConvolutionRectangleKernel"; - } - /** Default constructor */ - NEConvolutionRectangleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - unsigned int get_index(uint32_t val); - /** Apply the object's convolution to the given window of the input tensor. - * - * @param[in] win Window to apply the convolution on. - */ - template <typename OutputType, unsigned int rows, unsigned int cols> - void convolution(const Window &win); - -protected: - const ITensor *_input; /**< Input tensor */ - ITensor *_output; /**< Output tensor */ - uint32_t _scale; /**< Scale of the convolution */ - std::vector<int16_t> _convolution; /**< Convolution matrix */ - BorderSize _border_size; /**< Calculated border width */ - uint32_t _func_idx; /**< Index used to specify convolution function to be used */ - const static unsigned int _nr_supported_sizes - { - 4 - }; /**< Number of supported permutations */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h deleted file mode 100644 index d2dbbaef98..0000000000 --- a/arm_compute/core/NEON/kernels/NECopyKernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECOPYKERNEL_H -#define ARM_COMPUTE_NECOPYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a copy between two tensors */ -class NECopyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECopyKernel"; - } - /** Default constructor */ - NECopyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NECopyKernel(const NECopyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NECopyKernel &operator=(const NECopyKernel &) = delete; - /** Allow instances of this class to be moved */ - NECopyKernel(NECopyKernel &&) = default; - /** Allow instances of this class to be moved */ - NECopyKernel &operator=(NECopyKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. Data types supported: All - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - */ - void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList()); - /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel - * - * @param[in] input Source tensor. Data types supported: All - * @param[in] output Destination tensor. Data types supported: same as @p input. - * @param[in] padding (Optional) Padding to be applied to the input tensor - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - PaddingList _padding; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECOPYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h deleted file mode 100644 index ba58ab1e58..0000000000 --- a/arm_compute/core/NEON/kernels/NECropKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H -#define ARM_COMPUTE_NEON_CROP_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor cropping */ -class NECropKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECropKernel"; - } - /** Default constructor */ - NECropKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECropKernel(const NECropKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECropKernel &operator=(const NECropKernel &) = delete; - /** Allow instances of this class to be moved */ - NECropKernel(NECropKernel &&) = default; - /** Allow instances of this class to be moved */ - NECropKernel &operator=(NECropKernel &&) = default; - /** Default destructor */ - ~NECropKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * @note Padding not supported. - * - * @param[in] input Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. - * @param[in] crop_boxes Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values. - * Data type supported: F32 - * @param[in] box_ind One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input. - * Data type supported: F32 - * @param[out] output Destination tensor. Data type supported: F32 - * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - */ - void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); - - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * @note Padding not supported. - * - * @param[in] input Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC. - * @param[in] crop_boxes Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32 - * @param[in] box_ind Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image - * in @p input. Data type supported: F32 - * @param[in] output Destination tensor. Data type supported: F32 - * @param[in] crop_box_ind Index of the crop box to be used from @p crop_boxes. Default is 0. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0); - - /** Configure output tensor's shape as this can only be determined at runtime. */ - void configure_output_shape(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Function to use for in bounds crop for the particular tensor types passed to configure() */ - using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool); - -private: - const ITensor *_input; - const ITensor *_crop_boxes; - const ITensor *_box_ind; - ITensor *_output; - - Coordinates _start; - Coordinates _end; - uint32_t _crop_box_ind; - float _extrapolation_value; - /** The number of rows out of bounds at the start and end of output. */ - std::array<uint32_t, 2> _rows_out_of_bounds; - /** The number of columns out of bounds at the start and end of output. */ - std::array<uint32_t, 2> _cols_out_of_bounds; - - NECropKernel::InBoundsCropFunction *_in_bounds_crop_function; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h deleted file mode 100644 index 52442c3920..0000000000 --- a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H -#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class IDistribution1D; -class ILut; -class ITensor; -using IImage = ITensor; - -/** Interface for the cumulative distribution (cummulative summmation) calculation kernel. - * - * This kernel calculates the cumulative sum of a given distribution (meaning that each output element - * is the sum of all its previous elements including itself) and creates a lookup table with the normalized - * pixel intensities which is used for improve the constrast of the image. - */ -class NECumulativeDistributionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NECumulativeDistributionKernel"; - } - /** Default constructor */ - NECumulativeDistributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete; - /** Allow instances of this class to be moved */ - NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default; - /** Allow instances of this class to be moved */ - NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default; - /** Set the input and output distribution. - * - * @param[in] input Input image. Data type supported: U8 - * @param[in] distribution Unnormalized 256-bin distribution of the input image. - * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution. - * @param[out] output Equalization lookup table. Should consist of 256 entries of U8 elements. - */ - void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const IImage *_input; /**< Input image. */ - const IDistribution1D *_distribution; /**< Input histogram of the input image. */ - IDistribution1D *_cumulative_sum; /**< The cummulative distribution. */ - ILut *_output; /**< Output with the equalization lookup table. */ -private: - static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h deleted file mode 100644 index 6690ac2236..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEDepthConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthConcatenateLayerKernel"; - } - /** Default constructor */ - NEDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); - -private: - DepthConcatFunction *_func; - const ITensor *_input; - ITensor *_output; - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h deleted file mode 100644 index 5cda3203ed..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H -#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Depth conversion kernel - * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values. - */ -class NEDepthConvertLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthConvertLayerKernel"; - } - /** Default constructor*/ - NEDepthConvertLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete; - /** Default move constructor */ - NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete; - /** Default move assignment operator */ - NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default; - /** Set the input and output of the kernel - * - * Valid conversions Input -> Output : - * - * - QASYMM8_SIGNED -> S16, S32, F32, F16 - * - QASYMM8 -> U16, S16, S32, F32, F16 - * - U8 -> U16, S16, S32, F32, F16 - * - U16 -> U8, U32 - * - S16 -> QASYMM8_SIGNED, U8, S32 - * - BFLOAT16 -> F32 - * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8 - * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. - * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - */ - void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32. - * @param[in] policy Conversion policy - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - ConvertPolicy _policy; - uint32_t _shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h deleted file mode 100644 index 0b645887ee..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H -#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the depth to space kernel */ -class NEDepthToSpaceLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthToSpaceLayerKernel"; - } - /** Default constructor */ - NEDepthToSpaceLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default; - /** Default destructor */ - ~NEDepthToSpaceLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape x value. - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel. - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ - DataLayout _data_layout; /**< Data layout of the operation */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h deleted file mode 100644 index 227ddb4743..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. */ -class NEDepthwiseConvolutionLayer3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthwiseConvolutionLayer3x3Kernel"; - } - /** Default constructor */ - NEDepthwiseConvolutionLayer3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayer3x3Kernel(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Default move assignment operator */ - NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @note Supported data layouts: NCHW and NHWC - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3Kernel - * - * @note Supported data layouts: NCHW and NHWC - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [3, 3, IFM] for NCHW or [IFM, 3, 3] if NHWC data layout. Data type supported: Same as @p input. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - const Size2D &dilation = Size2D(1U, 1U)); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ITensor *_input; - ITensor *_output; - const ITensor *_weights; - PadStrideInfo _conv_info; - unsigned int _num_elems_written_per_iteration; - unsigned int _depth_multiplier; - Size2D _dilation; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h deleted file mode 100644 index 9737c9932e..0000000000 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/utils/misc/Requires.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include <arm_neon.h> -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to run a depthwise convolution native on a tensor. */ -class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthwiseConvolutionLayerNativeKernel"; - } - /** Default constructor */ - NEDepthwiseConvolutionLayerNativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Default move assignment operator */ - NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default; - /** Initialize the function's source, destination and parameters. - * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - const Size2D &dilation = Size2D(1U, 1U)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel - * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - const Size2D &dilation = Size2D(1U, 1U)); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - template < typename T, typename TW, int S, typename std::enable_if < std::is_same<T, float>::value -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - || std::is_same<T, float16_t>::value -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - , - int >::type = 0 > - void run_depthwise(const Window &window, bool has_biases); - - template < typename T, typename TW, int S, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > - void run_depthwise(const Window &window, bool has_biases); - - /** Common signature for all the specialised depthwise convolution native functions - * - * @param[in] window Region on which to execute the kernel. - */ - using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases); - - DepthwiseFunctionPtr _func; - BorderSize _border_size; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_biases; - ITensor *_output; - PadStrideInfo _conv_info; - unsigned int _depth_multiplier; - Size2D _dilation; - std::vector<int> _output_multiplier; - std::vector<int> _output_shift; - bool _has_biases; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h deleted file mode 100644 index 3792fb3bd7..0000000000 --- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the dequantization layer kernel. */ -class NEDequantizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDequantizationLayerKernel"; - } - /** Default constructor */ - NEDequantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default; - /** Default destructor */ - ~NEDequantizationLayerKernel() = default; - /** Set input, output tensors. - * - * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[in] output Output tensor info. Data types supported: F16/F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h deleted file mode 100644 index 20aee9b5ce..0000000000 --- a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H -#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the derivative along the X/Y directions on a tensor. - * - */ -class NEDerivativeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDerivativeKernel"; - } - /** Default constructor */ - NEDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel(const NEDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDerivativeKernel(NEDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform derivative along the X direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_x(const Window &window); - /** Function to perform derivative along the Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_y(const Window &window); - /** Function to perform derivative along the X and Y direction on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void derivative_xy(const Window &window); - /** Common signature for all the specialised derivative functions - * - * @param[in] window Region on which to execute the kernel. - */ - using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window); - /** Derivative function to use for the particular tensor types passed to configure() */ - DerivativeFunction _func; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ITensor *_output_y; /**< Output tensor - Derivate along the Y direction */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h deleted file mode 100644 index 00a954d958..0000000000 --- a/arm_compute/core/NEON/kernels/NEDilateKernel.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDILATEKERNEL_H -#define ARM_COMPUTE_NEDILATEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image dilatation */ -class NEDilateKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEDilateKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h deleted file mode 100644 index 4ae283d69d..0000000000 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON interface for Direct Convolution Layer kernel */ -class NEDirectConvolutionLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDirectConvolutionLayerKernel"; - } - /** Default constructor */ - NEDirectConvolutionLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default; - /** Default destructor */ - ~NEDirectConvolutionLayerKernel() = default; - /** Set the input, weights, and output tensors. - * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[out] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel - * - * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] output Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; - const ITensor *_weights; - ITensor *_output; - PadStrideInfo _conv_info; - BorderSize _border_size; - unsigned int _kernel_size; - unsigned int _num_weight_elems_read_per_row; - unsigned int _num_elems_read_per_iteration; - unsigned int _num_elems_written_per_iteration; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h deleted file mode 100644 index b7632d70c4..0000000000 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input. - * - * @note We assume bias to be shared - * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part - * of the @ref DirectConvolutionLayerOutputStageKernelInfo. - */ -class NEDirectConvolutionLayerOutputStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEDirectConvolutionLayerOutputStageKernel"; - } - /** Default constructor */ - NEDirectConvolutionLayerOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default; - /** Default destructor */ - ~NEDirectConvolutionLayerOutputStageKernel() = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input - * @param[out] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata - */ - void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr, - const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel - * - * @param[in] input Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input - * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr, - const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift); - -private: - OutputStageKernel *_func; - ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h deleted file mode 100644 index 61c25e1a2a..0000000000 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H -#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for an element-wise operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f] - * - */ -class NEElementwiseOperationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEElementwiseOperationKernel"; - } - /** Default constructor */ - NEElementwiseOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default; - /** Default destructor */ - ~NEElementwiseOperationKernel() = default; - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Common signature for all the specialised arithmetic functions - * - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Dependent on subclass. - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window); - -protected: - /** Validate the argument passed to the kernel - * - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Dependent on subclass. - */ - static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); - - /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff) - * - */ - void configure_common(const ITensor *input1, const ITensor *input2, ITensor *output); - - /** Function to use for the particular tensor types passed to configure() */ - std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function; - - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; - -class NEArithmeticOperationKernel : public NEElementwiseOperationKernel -{ -public: - /** Default constructor */ - NEArithmeticOperationKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(ArithmeticOperation op, const ITensor *input1, const ITensor *input2, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEDivisionOperationKernel : public NEArithmeticOperationKernel -{ -public: - /** Default constructor */ - NEDivisionOperationKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] input1 First tensor input. Data types supported: F16/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEPowerOperationKernel : public NEArithmeticOperationKernel -{ -public: - /** Default constructor */ - NEPowerOperationKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] input1 First tensor input. Data types supported: F16/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[out] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; - -class NEComparisonOperationKernel : public NEElementwiseOperationKernel -{ -public: - /** Default constructor */ - NEComparisonOperationKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. - * @param[in] output Output tensor. Data types supported: U16/U32. - */ - void configure(ComparisonOperation op, const ITensor *input1, const ITensor *input2, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel - * - * @param[in] op Comparison operation to be executed. - * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: U16/U32. - * - * @return a Status - */ - static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - -protected: - // Inherited methods overridden: - static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h deleted file mode 100644 index 9a41cecf19..0000000000 --- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H -#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for an element-wise unary operation kernel - * - * Element-wise operation is computed by: - * @f[ output(x) = OP(input(x))@f] - * - */ -class NEElementwiseUnaryKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEElementwiseUnaryKernel"; - } - /** Default constructor */ - NEElementwiseUnaryKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default; - /** Allow instances of this class to be moved */ - NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default; - /** Default destructor */ - ~NEElementwiseUnaryKernel() = default; - - /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input. Data types supported: F16/F32. - * @param[in] output Output tensor. Data types supported: Same as @p input. - */ - void configure(ElementWiseUnary op, const ITensor *input, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel - * - * @param[in] op Arithmetic operation to be executed. - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a Status - */ - static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised arithmetic functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window); - - /** Template function to run elementwise unary operation - * - * @tparam ScalarType Scalar datatype - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename ScalarType> - void elementwise_op(const Window &window); - - ElementwiseUnaryPtr _func; - const ITensor *_input; - ITensor *_output; - ElementWiseUnary _op; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h deleted file mode 100644 index e3fcc2847e..0000000000 --- a/arm_compute/core/NEON/kernels/NEErodeKernel.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEERODEKERNEL_H -#define ARM_COMPUTE_NEERODEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform boolean image erosion */ -class NEErodeKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEErodeKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEERODEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h b/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h deleted file mode 100644 index ed17e3b8d5..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H -#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the digit reverse operation kernel. */ -class NEFFTDigitReverseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTDigitReverseKernel"; - } - /** Constructor */ - NEFFTDigitReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete; - /** Default Move Constructor. */ - NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default; - /** Default move assignment operator */ - NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default; - /** Default destructor */ - ~NEFFTDigitReverseKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). - * @param[in] idx Digit reverse index tensor. Data type supported: U32 - * @param[in] config Kernel configuration. - */ - void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config); - - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor). - * @param[in] idx Digit reverse index tensor info. Data type supported: U32 - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window); - - template <bool is_input_complex, bool is_conj> - void digit_reverse_kernel_axis_0(const Window &window); - - template <bool is_input_complex, bool is_conj> - void digit_reverse_kernel_axis_1(const Window &window); - - NEFFTDigitReverseKernelFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - const ITensor *_idx; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h b/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h deleted file mode 100644 index 6e16fca0fb..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H -#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include <arm_neon.h> -#include <set> - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the FFT kernel. */ -class NEFFTRadixStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTRadixStageKernel"; - } - /** Constructor */ - NEFFTRadixStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete; - /** Default Move Constructor. */ - NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default; - /** Default move assignment operator */ - NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default; - /** Default destructor */ - ~NEFFTRadixStageKernel() = default; - /** Set the input and output tensors. - * - * @note If the output tensor is nullptr, the FFT will be performed in-place - * - * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input. - * @param[in] config FFT descriptor metadata. - */ - void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input. - * @param[in] config FFT descriptor metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config); - /** Returns the radix that are support by the FFT kernel - * - * @return A set of supported radix - */ - static std::set<unsigned int> supported_radix(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_input; - ITensor *_output; - bool _run_in_place; - unsigned int _Nx; - unsigned int _axis; - unsigned int _radix; - - void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config); - void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config); - - using FFTFunctionPointerAxis0 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int)>; - using FFTFunctionPointerAxis1 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int, unsigned int)>; - - FFTFunctionPointerAxis0 _func_0; - FFTFunctionPointerAxis1 _func_1; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h b/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h deleted file mode 100644 index 72963fa56d..0000000000 --- a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H -#define ARM_COMPUTE_NEFFTSCALEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/KernelDescriptors.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the inverse fft scale kernel. */ -class NEFFTScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFFTScaleKernel"; - } - /** Constructor */ - NEFFTScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTScaleKernel(const NEFFTScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete; - /** Default Move Constructor. */ - NEFFTScaleKernel(NEFFTScaleKernel &&) = default; - /** Default move assignment operator */ - NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default; - /** Default destructor */ - ~NEFFTScaleKernel() = default; - /** Set the input and output tensors. - * - * @param[in,out] input Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] config Kernel configuration - */ - void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config); - /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel - * - * @param[in] input Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor). - * @param[in] config Kernel configuration - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_input; - ITensor *_output; - float _scale; - bool _run_in_place; - bool _is_conj; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h deleted file mode 100644 index c0196c711a..0000000000 --- a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H -#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** NEON kernel to perform fast corners */ -class NEFastCornersKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFastCornersKernel"; - } - /** Constructor */ - NEFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel(const NEFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFastCornersKernel(NEFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default; - /** Initialise the kernel. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Output image. Data type supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const IImage *_input; /**< source image */ - IImage *_output; /**< inermediate results */ - uint8_t _threshold; /**< threshold on difference between intensity */ - bool _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h deleted file mode 100644 index e45caec34b..0000000000 --- a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H -#define ARM_COMPUTE_NEFILLARRAYKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */ -class NEFillArrayKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFillArrayKernel"; - } - /** Default contructor */ - NEFillArrayKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillArrayKernel(const NEFillArrayKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFillArrayKernel(NEFillArrayKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default; - /** Default detructor */ - ~NEFillArrayKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data type supported: U8. - * @param[in] threshold Texels greater than the threshold will be added to the array. - * @param[out] output Arrays of keypoints to store the results. - */ - void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - const IImage *_input; - IKeyPointArray *_output; - uint8_t _threshold; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h deleted file mode 100644 index 0c852e8232..0000000000 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H -#define ARM_COMPUTE_NEFILLBORDERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to fill borders */ -class NEFillBorderKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFillBorderKernel"; - } - /** Default Constructor */ - NEFillBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillBorderKernel(const NEFillBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFillBorderKernel(NEFillBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default; - /** Default destructor */ - ~NEFillBorderKernel() = default; - - /** Initialise the function. - * - * @note This kernel fills the borders within the XY-planes. - * - * @param[in,out] tensor Tensor to process. Data types supported: All. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] border_mode Border mode to use for the convolution. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - void fill_replicate_single_channel(const Window &window); - void fill_constant_value_single_channel(const Window &window); - - ITensor *_tensor; - BorderSize _border_size; - BorderMode _mode; - PixelValue _constant_border_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h deleted file mode 100644 index 9c1059e606..0000000000 --- a/arm_compute/core/NEON/kernels/NEFillInnerBorderKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H -#define ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to fill the interior borders */ -class NEFillInnerBorderKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFillInnerBorderKernel"; - } - /** Default constructor */ - NEFillInnerBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillInnerBorderKernel(const NEFillInnerBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFillInnerBorderKernel &operator=(const NEFillInnerBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFillInnerBorderKernel(NEFillInnerBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFillInnerBorderKernel &operator=(NEFillInnerBorderKernel &&) = default; - /** Default destructor */ - ~NEFillInnerBorderKernel() = default; - - /** Initialise the function. - * - * @note This kernel fills the borders within the XY-planes. - * - * @param[in,out] input Tensor to process. Data types supported: U8/S16/S32/F32. - * @param[in] border_size Size of the border to fill in elements. - * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. - * - */ - void configure(ITensor *input, BorderSize border_size, const PixelValue &constant_border_value = PixelValue()); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <typename T> - void fill_value_single_channel(const Window &window); - - ITensor *_tensor; - BorderSize _border_size; - PixelValue _constant_border_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFILLINNERBORDERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h deleted file mode 100644 index ba2f99857f..0000000000 --- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the flatten layer kernel. */ -class NEFlattenLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFlattenLayerKernel"; - } - /** Default constructor */ - NEFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default; - /** Default destructor */ - ~NEFlattenLayerKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h deleted file mode 100644 index 4cdd9f2ac0..0000000000 --- a/arm_compute/core/NEON/kernels/NEFloorKernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFLOORKERNEL_H -#define ARM_COMPUTE_NEFLOORKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a floor operation */ -class NEFloorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEFloorKernel"; - } - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[out] output Destination tensor. Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel - * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] output Destination tensor info. Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h deleted file mode 100644 index f598530d1e..0000000000 --- a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */ -class NEFuseBatchNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFuseBatchNormalizationKernel"; - } - /** Default constructor */ - NEFuseBatchNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default; - /** Default destructor */ - ~NEFuseBatchNormalizationKernel() = default; - /** Set the source, destination of the kernel - * - * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights - * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[out] fused_bias (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - */ - void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel - * - * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC - * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights - * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights - * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights - * @param[in] fused_bias (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights - * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights - * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights - * @note if nullptr, bn_beta is set to 0.0 - * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights - * @note if nullptr, bn_gamma is set to 1.0 - * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. - * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. - * - * @return a status - */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input_weights; - const ITensor *_input_bias; - const ITensor *_bn_mean; - const ITensor *_bn_var; - const ITensor *_bn_gamma; - const ITensor *_bn_beta; - ITensor *_fused_weights; - ITensor *_fused_bias; - float _epsilon; - bool _run_in_place_weights; - bool _run_in_place_bias; - - using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias, - const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window); - - FuseBatchNormFunction *_func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h deleted file mode 100644 index 6aa8e250a4..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H -#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Base class for GEMM NEON kernels implemented in Assembly. */ -class NEGEMMAssemblyBaseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMAssemblyBaseKernel"; - } - /** Constructor */ - NEGEMMAssemblyBaseKernel() - : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false) - { - } - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default; - - virtual ~NEGEMMAssemblyBaseKernel() = default; - - /** Initialise the kernel's input and output. - * - * The computed function is C = a * AxB + b * C. - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F32 - * @param[in] input1 Input tensor containing the Matrix B. Data types supported: same as @p input0 - * @param[in,out] output Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0. - * @param[out] workspace Space for intermediate results. - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of the accumulation. - * @param[in] is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false) - * @param[in] is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false) - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false) - { - internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1); - } - -protected: - virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0; - - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - ITensor *_workspace; - float _alpha; - float _beta; - bool _is_transposed_0; - bool _is_transposed_1; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h deleted file mode 100644 index b6e6beab53..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H -#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to interleave the elements of a matrix - * - * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ - * \end{array} \right) - * @f] - * - * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] - */ -class NEGEMMInterleave4x4Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMInterleave4x4Kernel"; - } - /* Constructor */ - NEGEMMInterleave4x4Kernel(); - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the transpose functions - * - * @param[in] input An input tensor. Data types supported: All - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. - */ - using GEMMInterleaveFunction = void(const ITensor *input, ITensor *output, const Window &window); - - GEMMInterleaveFunction *_func; /**< GEMM interleave function to use for the particular tensor types passed to configure() */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h deleted file mode 100644 index 8f47c5089d..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply matrices - * - * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel - * This kernel performs the following computation: - * - * -# Convert a values from int8 to int32 - * -# Convert b values from int8 to int32 - * -# Compute the int32 matrix product of the resulting a * b and store the result as int32 - * - */ -class NEGEMMLowpMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixMultiplyKernel"; - } - /** Constructor */ - NEGEMMLowpMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output. - * - * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two - * kernels change the layout of the original matrices to be more cache-friendly. - * - * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED - * @param[in] input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32 - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel - * - * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED - * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL - * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - bool _slide_matrix_b; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h deleted file mode 100644 index b069e4cfac..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The final result is: - * - * mm_result[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - * - */ -class NEGEMMLowpOffsetContributionKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpOffsetContributionKernel"; - } - /** Constructor */ - NEGEMMLowpOffsetContributionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in, out] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - */ - void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel - * - * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_vector_sum_col; - const ITensor *_vector_sum_row; - ITensor *_mm_result; - int32_t _a_offset; - int32_t _b_offset; - int32_t _k_offset; - bool _slide_vector_sum_col; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h deleted file mode 100644 index 0dc64c9842..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel. - * - * The computation is performed in-place - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), - * and adds to it the offset contribution of matrix A and matrix B in-place. - * - * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8. - * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8. - * - * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is: - * - * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift - * - * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is: - * - * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift - * - * where FixedPointMul(x, y) is the nearest integer to the following - * mathematical expression, evaluated without overflow or intermediate rounding: - * - * (x * y) / 2^31 - * - * and mm_result'[i][k] = mm_result[i][k] + - * (vector_sum_col[k] * a_offset) + - * (vector_sum_row[i] * b_offset) + - * (a_offset * b_offset * k) - */ - -class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpOffsetContributionOutputStageKernel"; - } - /** Constructor */ - NEGEMMLowpOffsetContributionOutputStageKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] mm_result Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A. - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. - * @param[out] output Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] k Number of matrix A columns or Matrix B rows - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. - */ - void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel - * - * @param[in] mm_result Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32 - * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B. - * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result - * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A. - * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result. - * @param[in] output Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] a_offset Offset to be added to each element of the matrix A. - * @param[in] b_offset Offset to be added to each element of the matrix B. - * @param[in] output_stage GEMMLowp output stage info, providing the type of quantization and the necessary parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset, - int32_t b_offset, - GEMMLowpOutputStageInfo output_stage); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - using NEGEMMLowpOffsetContributionOutputStageFunction = std::function<void(const Window, const ITensor *, const ITensor *, const ITensor *, const ITensor *, - ITensor *, int32_t, int32_t, int32_t, bool, GEMMLowpOutputStageInfo)>; - -private: - /** Function to use for the particular tensors passed to configure() */ - NEGEMMLowpOffsetContributionOutputStageFunction _function; - const ITensor *_vector_sum_col; - const ITensor *_vector_sum_row; - const ITensor *_bias; - const ITensor *_mm_result; - ITensor *_output; - int32_t _a_offset; - int32_t _b_offset; - int32_t _k_offset; - bool _slide_vector_sum_col; - GEMMLowpOutputStageInfo _output_stage; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h deleted file mode 100644 index b4a1419c9b..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Add offset terms to final result - * -# Multiply each entry of result by result_mult_int - * -# Add bias to final result if bias tensor is not a nullptr - * -# Shift the int32 accumulator by result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values: - * -# -to the [0..255] range and cast to QASYMM8. - * -# -to the [-128..127] range and cast to QASYMM8_SIGNED. - * - */ -class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ScaleKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] output_stage GEMMLowp output stage metadata. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] output_stage GEMMLowp output stage metadata. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T> - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - const GEMMLowpOutputStageInfo *_output_stage; - bool _is_bounded_relu; -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h deleted file mode 100644 index 0806bd1df5..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16 - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16. - * - */ -class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16. - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @param[in] input Input tensor info. Data type supported: S32 - * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0. - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool is_bounded_relu> - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h deleted file mode 100644 index 2b3657c728..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED. - * - */ -class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool is_bounded_relu> - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h deleted file mode 100644 index 2f099a3ebb..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 - * - * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. - * The following computations will be performed by the kernel: - * - * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier - * -# Add bias to final result if bias tensor is not a nullptr - * -# Round to nearest division by a power-of-two using result_shift - * -# Add offset to each result - * -# Clamp the value between the specified min and max bounds - * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8. - * - */ -class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel"; - } - /** Constructor */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add - * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication - * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] input Input tensor. Data type supported: S32 - * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required. - * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8 - * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8 - * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8, - * Along with @p min, this value can be used to implement "rectified linear unit" activation functions - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <bool is_bounded_relu> - void run(const Window &window); - - /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window); - - QuantizeDownFunctionPtr _func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; - int _result_fixedpoint_multiplier; - int _result_shift; - int _result_offset_after_shift; - int _min; - int _max; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h deleted file mode 100644 index 1e472f5252..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H -#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; -struct GEMMLowpReductionKernelInfo; - -/** Common interface for all NEON reduction kernels */ -class INEGEMMLowpReductionKernel : public INEKernel -{ -public: - /** Constructor */ - INEGEMMLowpReductionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers)*/ - INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete; - /** Allow instances of this class to be moved */ - INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default; - /** Allow instances of this class to be moved */ - INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k Number of matrix columns/rows depending on the type of reduction. - * - is_reshaped True if the matrix has been reshaped. - * - scalar Scalar value to multiply each reduced column/row by. - * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value. - */ - virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0; - -protected: - const ITensor *_input; - ITensor *_output; - int32_t _k; - bool _is_reshaped; - int32_t _scalar; - bool _mul_by_scalar; -}; - -/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixAReductionKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_a_cols) Number of matrix A columns - * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced column must be multiplied by a scalar value. - */ - void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel - * - * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_a_cols) Number of matrix A columns - * - is_reshaped (is_interleaved4x4) True if the matrix A has been interleaved4x4 - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced column must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Execution of the reduction kernel specialized on the input type - * - * @param[in] window Execution window - */ - template <typename T> - void run_internal(const Window &window); -}; - -/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B. - * - * @note This stage is needed to handle the offset of matrix product - * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md - */ -class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel -{ -public: - const char *name() const override - { - return "NEGEMMLowpMatrixBReductionKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_b_rows) Number of matrix B rows. - * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced row must be multiplied by a scalar value. - */ - void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override; - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel - * - * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED - * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32 - * @param[in] info Kernel metadata: - * - k (num_mtx_b_rows) Number of matrix B rows. - * - is_reshaped (is_transposed1xW) True if the input tensor is transposed 1xW. - * - scalar Scalar value to multiply each reduced row by. - * - mul_byscalar True if each reduced row must be multiplied by a scalar value. - * - * @return a status - */ - static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Execution of the reduction kernel specialized on the input type - * - * @param[in] window Execution window - * @param[in] info Thread-related information - */ - template <typename T> - void run_internal(const Window &window, const ThreadInfo &info); -}; -} // namespace arm_compute - -#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h deleted file mode 100644 index a3ba57e4ab..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H -#define ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -/** NEON kernel to add a bias to each row of the input tensor */ -class NEGEMMMatrixAccumulateBiasesKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixAccumulateBiasesKernel"; - } - /** Default constructor */ - NEGEMMMatrixAccumulateBiasesKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixAccumulateBiasesKernel(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixAccumulateBiasesKernel &operator=(const NEGEMMMatrixAccumulateBiasesKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAccumulateBiasesKernel(NEGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAccumulateBiasesKernel &operator=(NEGEMMMatrixAccumulateBiasesKernel &&) = default; - /** Default destructor */ - ~NEGEMMMatrixAccumulateBiasesKernel() = default; - /** Set the accumulate buffer and the biases of the kernel. - * - * @param[in, out] accum The accumulate tensor to convert. Data type supported: F32 - * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input - */ - void configure(ITensor *accum, const ITensor *biases); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAccumulateBiasesKernel - * - * @param[in] accum The accumulate tensor to convert. Data type supported: F32 - * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *accum, const ITensorInfo *biases); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_accum; - const ITensor *_biases; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMMATRIXACCUMULATEBIASESKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h deleted file mode 100644 index e528c59d8f..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H -#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: - * - * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size - * - * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have: - * - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel - * - MTX_1 = C - */ -class NEGEMMMatrixAdditionKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixAdditionKernel"; - } - /** Constructor */ - NEGEMMMatrixAdditionKernel(); - /** Prevent instances of this class from being copied */ - NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete; - /** Prevent instances of this class from being copied */ - NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32 - * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. - * @param[in] beta Weight of matrix C - */ - void configure(const ITensor *input, ITensor *output, float beta); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] input Input tensor info (Matrix C). Data types supported: F16/F32 - * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input. - * @param[in] beta Weight of matrix C - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the matrix addition functions - * - * @param[in] input An input tensor. Data types supported: F16/F32 - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. - * @param[in] beta Weight of matrix C - */ - using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta); - /** Matrix addition function to use for the particular tensor types passed to configure() */ - MatrixAdditionFunction *_func; - float _beta; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h deleted file mode 100644 index 841e08d0ef..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication - * - * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel - * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped - * - */ -class NEGEMMMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixMultiplyKernel"; - } - /** Constructor */ - NEGEMMMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * These two kernels change the layout of the original matrices to be more cache-friendly. - * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Weight of the matrix product - * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - float _alpha; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h deleted file mode 100644 index f5635dd58c..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_ -#define ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_ - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the GEMM matrix vector multiply kernel. **/ -class NEGEMMMatrixVectorMultiplyKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMMatrixVectorMultiplyKernel"; - } - /** Default constructor */ - NEGEMMMatrixVectorMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixVectorMultiplyKernel(const NEGEMMMatrixVectorMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGEMMMatrixVectorMultiplyKernel &operator=(const NEGEMMMatrixVectorMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGEMMMatrixVectorMultiplyKernel(NEGEMMMatrixVectorMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGEMMMatrixVectorMultiplyKernel &operator=(NEGEMMMatrixVectorMultiplyKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input0 First Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 Second Input tensor. Data types supported: same as @p input. - * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8/QASYMM8_SIGNED input. - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixVectorMultiplyKernel - * - * @param[in] input0 First Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 Second Input tensor. Data types supported: same as @p input. - * @param[in] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input, S32 for QASYMM8/QASYMM8_SIGNED input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Template function to run the matrix vector multiplication - * - * @tparam I0 Input 0 type - * @tparam I1 Input 1 type - * @tparam O Output type - * - * @param[in] window_in Input region. (Must be a valid region of the window returned by window()). - * @param[in] window_w Weights region. (Must be a valid region of the window returned by window()). - * @param[in] window_out Output region.(Must be a valid region of the window returned by window()). - */ - template <typename I0, typename I1, typename O> - void matrix_vector_multiply(const Window &window_in, const Window &window_w, const Window &window_out); - /** Common signature for all the specialised matrix vector multiplication functions */ - using GEMMMatrixVectorMultiplyFunctionPtr = void (NEGEMMMatrixVectorMultiplyKernel::*)(const Window &window_in, - const Window &window_w, - const Window &window_out); - -private: - GEMMMatrixVectorMultiplyFunctionPtr _func; - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMMATRIXVECTORMULTIPLYKERNEL_H_*/ diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h deleted file mode 100644 index 967a1b73dc..0000000000 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H -#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) - * - * Following an example of how the transposition1xW works when the input data is F32 - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccccccccccc} - * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - * - * Following an example of how the transposition1xW works when the input data type is F16 - * - * @f[ - * \left( \begin{array}{cccccccc} - * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\ - * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\ - * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\ - * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc} - * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\ - * \end{array} \right) - * @f] - * - * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) - * - */ -class NEGEMMTranspose1xWKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGEMMTranspose1xWKernel"; - } - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: same as @p input. - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h deleted file mode 100644 index bfef40b53b..0000000000 --- a/arm_compute/core/NEON/kernels/NEGatherKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEGATHERKERNEL_H -#define ARM_COMPUTE_NEGATHERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Kernel to perform other operation on NEON */ -class NEGatherKernel : public INEKernel -{ -public: - /** Default constructor. */ - NEGatherKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernel(const NEGatherKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NEGatherKernel &operator=(const NEGatherKernel &) = delete; - /** Allow instances of this class to be moved. */ - NEGatherKernel(NEGatherKernel &&) = default; - /** Allow instances of this class to be moved. */ - NEGatherKernel &operator=(NEGatherKernel &&) = default; - /** Default detructor */ - ~NEGatherKernel() = default; - - /** Name of the kernel - * - * @return Kernel name - */ - const char *name() const override - { - return "NEGatherKernel"; - } - /** Initialise the kernel's inputs and outputs - * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - */ - void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); - /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel - * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All - * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Implementation of the gather operation for 0 axis. - * - * For gather on the 0 axis an element by element copy is performed. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> - void gather_0_axis(const Window &window, const ThreadInfo &info); - - /** Implementation of the gather operation. - * - * For 1<=axis a row-wise copy is taking place. - * - * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) - * @param[in] info Info about executing thread and CPU. - */ - template <typename U> - void gather_n_axis(const Window &window, const ThreadInfo &info); - - using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info); - - const ITensor *_input; - const ITensor *_indices; - int _axis; - ITensor *_output; - kernel_ptr _func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h deleted file mode 100644 index fa92eef1b7..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Gaussian 3x3 filter */ -class NEGaussian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: S16 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h deleted file mode 100644 index 5e63e5136f..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H -#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */ -class NEGaussian5x5HorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5HorKernel"; - } - /** Default constructor */ - NEGaussian5x5HorKernel(); - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; -}; - -/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */ -class NEGaussian5x5VertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussian5x5VertKernel"; - } - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor, Data type supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h deleted file mode 100644 index 4700325b5f..0000000000 --- a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a GaussianPyramid (horizontal pass) */ -class NEGaussianPyramidHorKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidHorKernel"; - } - /** Default constructor */ - NEGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** NEON kernel to perform a GaussianPyramid (vertical pass) */ -class NEGaussianPyramidVertKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEGaussianPyramidVertKernel"; - } - /** Default constructor */ - NEGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~NEGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data type supported: S16. - * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8. - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h deleted file mode 100644 index 382ce54518..0000000000 --- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H -#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -namespace arm_compute -{ -class ITensor; - -/** Interface for Compute All Anchors kernel */ -class NEComputeAllAnchorsKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEComputeAllAnchorsKernel"; - } - - /** Default constructor */ - NEComputeAllAnchorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete; - /** Allow instances of this class to be moved */ - NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default; - /** Allow instances of this class to be moved */ - NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default; - /** Default destructor */ - ~NEComputeAllAnchorsKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - */ - void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel - * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 - * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input - * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo - * - * @return a Status - */ - static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <typename T> - void internal_run(const Window &window); - - const ITensor *_anchors; - ITensor *_all_anchors; - ComputeAnchorsInfo _anchors_info; -}; -} // arm_compute -#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h deleted file mode 100644 index edb2da58e2..0000000000 --- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Size2D.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform HOG Orientation Binning */ -class NEHOGOrientationBinningKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGOrientationBinningKernel"; - } - /** Default constructor */ - NEHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~NEHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] mag_row_ptr Pointer to the first row of the cell in the magnitude tensor - * @param[in] phase_row_ptr Pointer to the first row of the cell in the phase tensor - * @param[out] output_ptr Pointer to the output cell of hog space tensor - * @param[in] mag_stride Stride of the magnitude tensor - * @param[in] phase_stride Stride of the phase tensor - * @param[in] cell_width Width of the cell - * @param[in] cell_height Height of the cell - * @param[in] num_bins Number of bins for each cell - * @param[in] phase_scale Scale factor to apply to the phase in order to calculate the histogram index - */ - using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width, - size_t cell_height, size_t num_bins, float phase_scale); - /** Orientation binning function to use for the particular cell width passed to configure() */ - OrientBinFunc *_func; - const ITensor *_input_magnitude; - const ITensor *_input_phase; - ITensor *_output; - size_t _cell_width; - size_t _cell_height; - size_t _num_bins; - float _phase_scale; -}; - -/** NEON kernel to perform HOG block normalization */ -class NEHOGBlockNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGBlockNormalizationKernel"; - } - /** Default constructor */ - NEHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~NEHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised block normalization functions - * - * @param[in] input_row_ptr Pointer to the first row of the block in the input hog space tensor - * @param[out] output_ptr Pointer to the output block of the hog normalized space - * @param[in] input_stride Stride of the input hog space tensor - * @param[in] num_cells_per_block_height Number of cells per block along the Y direction - * @param[in] num_bins_block_x Number of bins per block along the X direction - * @param[in] num_bins_block Number of total bins per block - * @param[in] l2_hyst_threshold Threshold to use for l2 hysteresis normalization - */ - using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block, - float l2_hyst_threshold); - /** Block normalization function to use for the particular normalization type passed to configure() */ - BlockNormFunc *_func; - const ITensor *_input; - ITensor *_output; - Size2D _num_cells_per_block; - Size2D _num_cells_per_block_stride; - size_t _num_bins; - float _l2_hyst_threshold; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h deleted file mode 100644 index acb35923d4..0000000000 --- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H -#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform HOG detector kernel using linear SVM */ -class NEHOGDetectorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHOGDetectorKernel"; - } - /** Default constructor */ - NEHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete; - /** Default destructor */ - ~NEHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref NEHOGOrientationBinningKernel and @ref NEHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - IDetectionWindowArray *_detection_windows; - const float *_hog_descriptor; - float _bias; - float _threshold; - uint16_t _idx_class; - size_t _num_bins_per_descriptor_x; - size_t _num_blocks_per_descriptor_y; - size_t _block_stride_width; - size_t _block_stride_height; - size_t _detection_window_width; - size_t _detection_window_height; - size_t _max_num_detection_windows; - arm_compute::Mutex _mutex; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h deleted file mode 100644 index a77fe16ac2..0000000000 --- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H - -#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" -#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Common interface for all Harris Score kernels */ -class INEHarrisScoreKernel : public INEKernel -{ -public: - /** Default constructor */ - INEHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default; - /** Default destructor */ - ~INEHarrisScoreKernel() = default; - -public: - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported: S16/S32 - * @param[in] input2 Source image (gradient Y). Data types supported: same as @ input1 - * @param[out] output Destination image (harris score). Data types supported: F32 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0; - -protected: - const IImage *_input1; /**< Source image - Gx component */ - const IImage *_input2; /**< Source image - Gy component */ - IImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; - -/** Template NEON kernel to perform Harris Score. - * The implementation supports 3, 5, and 7 for the block_size - */ -template <int32_t block_size> -class NEHarrisScoreKernel : public INEHarrisScoreKernel -{ -public: - const char *name() const override - { - return "NEHarrisScoreKernel"; - } - /** Default constructor */ - NEHarrisScoreKernel(); - // Inherited methods overridden: - void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override; - BorderSize border_size() const override; - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised harris score functions */ - using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride, - float norm_factor, float sensitivity, float strength_thresh); - /** Harris Score function to use for the particular image types passed to configure() */ - HarrisScoreFunction *_func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h deleted file mode 100644 index be81f2e963..0000000000 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEHeightConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHeightConcatenateLayerKernel"; - } - /** Default constructor */ - NEHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ITensor *input, unsigned int height_offset, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h deleted file mode 100644 index b1dd105676..0000000000 --- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H -#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include <cstddef> -#include <cstdint> - -namespace arm_compute -{ -class IDistribution1D; -class ITensor; -using IImage = ITensor; - -/** Interface for the histogram kernel */ -class NEHistogramKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEHistogramKernel"; - } - /** Default constructor */ - NEHistogramKernel(); - /** Default destructor */ - ~NEHistogramKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEHistogramKernel &operator=(const NEHistogramKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel(NEHistogramKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEHistogramKernel &operator=(NEHistogramKernel &&) = delete; - - /** Set the input image and the distribution output. - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution. - * @param[in,out] local_hist Array that the threads use to save their local histograms. - * It's size should be equal to (number_of_threads * num_bins), - * and the Window::thread_id() is used to determine the part of the array - * used by each thread. - * @param[out] window_lut LUT with pre-calculated possible window values. - * The size of the LUT should be equal to max_range_size and it will be filled - * during the configure stage, while it re-used in every run, therefore can be - * safely shared among threads. - */ - void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut); - /** Set the input image and the distribution output. - * - * @note Used for histogram of fixed size equal to 256 - * - * @param[in] input Source image. Data type supported: U8. - * @param[out] output Destination distribution which must be of 256 bins.. - */ - void configure(const IImage *input, IDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to merge multiple partial histograms. - * - * @param[out] global_hist Pointer to the final histogram. - * @param[in] local_hist Pointer to the partial histograms. - * @param[in] bins Number of bins. - */ - void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins); - /** Function to merge multiple minimum values of partial histograms. - * - * @param[out] global_min Pointer to the global min value. - * @param[in] local_min Local min value. - */ - void merge_min(uint8_t *global_min, const uint8_t &local_min); - /** Function to perform histogram on the given window - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_U8(Window win, const ThreadInfo &info); - /** Function to perform histogram on the given window where histogram is - * of fixed size 256 without ranges and offsets. - * - * @param[in] win Region on which to execute the kernel - * @param[in] info Info about the executing thread - */ - void histogram_fixed_U8(Window win, const ThreadInfo &info); - /** Pre-calculate the pixel windowing for every possible pixel - * - * Calculate (V - offset) * numBins / range where V is every possible pixel value. - * - * @note We currently support U8 image thus possible pixel values are between 0 and 255 - */ - void calculate_window_lut() const; - /** Common signature for all the specialised Histogram functions - * - * @param[in] window Region on which to execute the kernel. - */ - using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info); - - HistogramFunctionPtr _func; ///< Histogram function to use for the particular image types passed to configure() - const IImage *_input; - IDistribution1D *_output; - uint32_t *_local_hist; - uint32_t *_window_lut; - arm_compute::Mutex _hist_mtx; - static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h deleted file mode 100644 index 1c358b379d..0000000000 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H -#define ARM_COMPUTE_NEIM2COLKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; -class Size2D; - -/** Interface for the im2col reshape kernel. - * - * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. - * It is used to transform a convolution to a plain matrix multiplication. - * - * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: - * - * @f[ - * \left( \begin{array}{cccc} - * a00 & a01 & a02 & a03 \\ - * a10 & a11 & a12 & a13 \\ - * a20 & a21 & a22 & a23 \\ - * a30 & a31 & a32 & a33 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ - * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ - * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ - * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ - * \end{array} \right) - * @f] - */ -class NEIm2ColKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEIm2ColKernel"; - } - /** Default constructor */ - NEIm2ColKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel(const NEIm2ColKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete; - /** Allow instances of this class to be moved */ - NEIm2ColKernel(NEIm2ColKernel &&) = default; - /** Allow instances of this class to be moved */ - NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default; - /** Default destructor */ - ~NEIm2ColKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported - */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); - /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run im2col - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T, bool has_pads, bool is_nchw> - void run_im2col(const Window &window); - - /** Common signature for all the specialised im2col functions - * - * @param[in] window Region on which to execute the kernel. - */ - using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window); - - Im2ColFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - std::pair<unsigned int, unsigned int> _convolved_dims; - PadStrideInfo _conv_info; - unsigned int _kernel_width; - unsigned int _kernel_height; - bool _has_bias; - Size2D _dilation; - DataLayout _data_layout; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h deleted file mode 100644 index 7c14e409c6..0000000000 --- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for performing an instance normalization */ -class NEInstanceNormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEInstanceNormalizationLayerKernel"; - } - /** Default constructor */ - NEInstanceNormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NEInstanceNormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - */ - void configure(ITensor *input, ITensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); - - /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialized instance normalization functions - * - * @param[in, out] input An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output The output tensor. - * @param[in] gamma The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon Lower bound value for the normalization. Defaults to 1e-12 - */ - using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window); - - NormalizationFunction *_func; - ITensor *_input; - ITensor *_output; - float _gamma; - float _beta; - float _epsilon; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h deleted file mode 100644 index 77ae7b9efa..0000000000 --- a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform an image integral on an image */ -class NEIntegralImageKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEIntegralImageKernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U32 - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - bool is_parallelisable() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h deleted file mode 100644 index 3937bf0163..0000000000 --- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H -#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ -class NEL2NormalizeLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEL2NormalizeLayerKernel"; - } - /** Default constructor */ - NEL2NormalizeLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default; - /** Default destructor */ - ~NEL2NormalizeLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F16/F32. - * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - */ - void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon); - - /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel. - * - * @param[in] input Source tensor info. Data types supported: F16/F32. - * @param[in] sum Sum values tensor info. Data types supported: same as @p input. - * Sum will have the same number of dimensions as input. - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 - * @param[in] epsilon Lower bound value for the normalization. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_sum; - ITensor *_output; - unsigned int _actual_axis; - float _epsilon; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h deleted file mode 100644 index cf99bbe691..0000000000 --- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H -#define ARM_COMPUTE_LKTRACKERKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstddef> -#include <cstdint> -#include <tuple> -#include <utility> - -namespace arm_compute -{ -class ITensor; - -/** Internal keypoint class for Lucas-Kanade Optical Flow */ -struct NELKInternalKeypoint -{ - float x{ 0.f }; /**< x coordinate of the keypoint */ - float y{ 0.f }; /**< y coordinate of the keypoint */ - bool tracking_status{ false }; /**< the tracking status of the keypoint */ -}; - -/** Interface for NEON Array of Internal Key Points. */ -using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>; - -/** Interface for the Lucas-Kanade tracker kernel */ -class NELKTrackerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NELKTrackerKernel"; - } - /** Default constructor */ - NELKTrackerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELKTrackerKernel(const NELKTrackerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete; - /** Allow instances of this class to be moved */ - NELKTrackerKernel(NELKTrackerKernel &&) = default; - /** Allow instances of this class to be moved */ - NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default; - /** Default destructor */ - ~NELKTrackerKernel() = default; - - /** Initialise the kernel input and output - * - * @param[in] input_old Pointer to the input old tensor. Data type supported: U8 - * @param[in] input_new Pointer to the input new tensor. Data type supported. U8 - * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data type supported: S16 - * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data type supported: S16 - * @param[in] old_points Pointer to the IKeyPointArray storing old key points - * @param[in] new_points_estimates Pointer to the IKeyPointArray storing new estimates key points - * @param[out] new_points Pointer to the IKeyPointArray storing new key points - * @param[in, out] old_points_internal Pointer to the array of NELKInternalKeypoint for old points - * @param[out] new_points_internal Pointer to the array of NELKInternalKeypoint for new points - * @param[in] termination The criteria to terminate the search of each keypoint. - * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used - * @param[in] epsilon The error for terminating the algorithm - * @param[in] num_iterations The maximum number of iterations before terminate the algorithm - * @param[in] window_dimension The size of the window on which to perform the algorithm - * @param[in] level The pyramid level - * @param[in] num_levels The number of pyramid levels - * @param[in] pyramid_scale Scale factor used for generating the pyramid - */ - void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy, - const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points, - INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal, - Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension, - size_t level, size_t num_levels, float pyramid_scale); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Initialise the array of keypoints in the provide range - * - * @param[in] start Index of first element in the keypoints array to be initialised - * @param[in] end Index after last elelemnt in the keypoints array to be initialised - */ - void init_keypoints(int start, int end); - /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y - * - * @param[in] keypoint Keypoint for which gradients are computed - * @param[out] bilinear_ix Intermediate interpolated data for X gradient - * @param[out] bilinear_iy Intermediate interpolated data for Y gradient - * - * @return Values A11, A12, A22 - */ - std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy); - /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y} - * - * @param[in] old_keypoint Old keypoint for which gradient is computed - * @param[in] new_keypoint New keypoint for which gradient is computed - * @param[in] bilinear_ix Intermediate interpolated data for X gradient - * @param[in] bilinear_iy Intermediate interpolated data for Y gradient - * - * @return Values b1, b2 - */ - std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy); - - const ITensor *_input_old; - const ITensor *_input_new; - const ITensor *_old_scharr_gx; - const ITensor *_old_scharr_gy; - IKeyPointArray *_new_points; - const IKeyPointArray *_new_points_estimates; - const IKeyPointArray *_old_points; - INELKInternalKeypointArray *_old_points_internal; - INELKInternalKeypointArray *_new_points_internal; - Termination _termination; - bool _use_initial_estimate; - float _pyramid_scale; - float _epsilon; - unsigned int _num_iterations; - int _window_dimension; - unsigned int _level; - unsigned int _num_levels; - ValidRegion _valid_region; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h deleted file mode 100644 index ad2a161296..0000000000 --- a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */ -class NELocallyConnectedMatrixMultiplyKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NELocallyConnectedMatrixMultiplyKernel"; - } - /** Default constructor */ - NELocallyConnectedMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input and output - * - * @param[in] input0 First input tensor. Data types supported: F16, F32 - * @param[in] input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel - * - * @param[in] input0 First input tensor info. Data types supported: F16, F32 - * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input0; - const ITensor *_input1; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h deleted file mode 100644 index 7ad5bf0d99..0000000000 --- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Template interface for the kernel to compute magnitude and phase */ -template <MagnitudeType mag_type, PhaseType phase_type> -class NEMagnitudePhaseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMagnitudePhaseKernel"; - } - /** Default constructor */ - NEMagnitudePhaseKernel(); - /** Destructor */ - ~NEMagnitudePhaseKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete; - /** Default move constructor */ - NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete; - /** Default move assignment operator */ - NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default; - - /** Initialise the kernel's input, output. - * - * @note At least one of out1 or out2 must be set - * - * @param[in] gx Gradient X tensor. Data type supported: S16. - * @param[in] gy Gradient Y tensor. Data type supported: S16. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16. - * @param[out] phase (Optional) The output tensor - Phase. Data type supported: U8. - */ - void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to perform magnitude on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude(const Window &window); - /** Function to perform phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void phase(const Window &window); - /** Function to perform magnitude and phase on the given window - * - * @param[in] window Region on which to execute the kernel - */ - void magnitude_phase(const Window &window); - -private: - /** Common signature for all the specialised MagnitudePhase functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window); - /** MagnitudePhase function to use for the particular formats passed to configure() */ - MagnitudePhaseFunctionPtr _func; - const ITensor *_gx; /**< Input gradient X */ - const ITensor *_gy; /**< Input gradient Y */ - ITensor *_magnitude; /**< Output - Magnitude */ - ITensor *_phase; /**< Output - Phase */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h deleted file mode 100644 index 2197e3cfbe..0000000000 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class NEMeanStdDevKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMeanStdDevKernel"; - } - /** Default constructor */ - NEMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete; - /** Default destructor */ - ~NEMeanStdDevKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data type supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values. - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - */ - void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - BorderSize border_size() const override; - -private: - const IImage *_input; - float *_mean; - float *_stddev; - uint64_t *_global_sum; - uint64_t *_global_sum_squared; - arm_compute::Mutex _mtx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h deleted file mode 100644 index dc0455cc4c..0000000000 --- a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#include <arm_fp16.h> -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */ -class NEMeanStdDevNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMeanStdDevNormalizationKernel"; - } - /** Default constructor */ - NEMeanStdDevNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default; - /** Default destructor */ - ~NEMeanStdDevNormalizationKernel() = default; - /** Initialise the kernel's input and outputs. - * - * @note If the output tensor is a nullptr, the normalization will be performed in-place. - * - * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - */ - void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f); - /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel - * - * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, - * this tensor will store the result of the normalization. Data types supported: F16/F32. - * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input - * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Normalizes the input with respect to mean and standard deviation. - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename ScalarType, int size> - void mean_stddev_normalization(const Window &window); - - ITensor *_input; - ITensor *_output; - float _epsilon; - - using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window); - - MeanStdDevNormFunction _func; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h deleted file mode 100644 index 3e86860f79..0000000000 --- a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H -#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel to perform a median filter on a tensor */ -class NEMedian3x3Kernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEMedian3x3Kernel"; - } - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h deleted file mode 100644 index b4bcd11b82..0000000000 --- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H -#define ARM_COMPUTE_NEMEMSETKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for filling the planes of a tensor */ -class NEMemsetKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMemsetKernel"; - } - /** Default constructor */ - NEMemsetKernel(); - /** Default destructor */ - ~NEMemsetKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMemsetKernel(const NEMemsetKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMemsetKernel &operator=(const NEMemsetKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMemsetKernel(NEMemsetKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMemsetKernel &operator=(NEMemsetKernel &&) = default; - /** Initialise the kernel's tensor and filling value - * - * @param[in,out] tensor Input tensor to fill. Supported data types: All - * @param[in] constant_value The value used to fill the planes of the tensor - */ - void configure(ITensor *tensor, const PixelValue &constant_value); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - ITensor *_tensor; - PixelValue _constant_value; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h deleted file mode 100644 index 445e12af03..0000000000 --- a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H -#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform min max search on a 3D tensor. */ -class NEMinMaxLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxLayerKernel"; - } - /** Default constructor */ - NEMinMaxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete; - /** Default destructor */ - ~NEMinMaxLayerKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @note output[0] = minimum - * @note output[1] = maximum - * - * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32. - * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32 - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel - * - * @param[in] input Input tensor info. Data types supported: F32. - * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor. - * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - /** Resets global minimum and maximum. */ - void reset(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - void update_min_max(float *out_ptr, float min, float max); - const ITensor *_input; - ITensor *_output; - arm_compute::Mutex _mtx; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h deleted file mode 100644 index 597a093d70..0000000000 --- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/IArray.h" -#include "arm_compute/core/NEON/INEKernel.h" -#include "support/Mutex.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; -using IImage = ITensor; - -/** Interface for the kernel to perform min max search on an image. */ -class NEMinMaxKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxKernel"; - } - /** Default constructor */ - NEMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel(NEMinMaxKernel &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete; - /** Default destructor */ - ~NEMinMaxKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const IImage *input, void *min, void *max); - /** Resets global minimum and maximum. */ - void reset(); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Performs the min/max algorithm on U8 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_U8(Window win); - /** Performs the min/max algorithm on S16 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_S16(Window win); - /** Performs the min/max algorithm on F32 images on a given window. - * - * @param win The window to run the algorithm on. - */ - void minmax_F32(Window win); - /** Common signature for all the specialised MinMax functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxFunction = void (NEMinMaxKernel::*)(Window window); - /** MinMax function to use for the particular image types passed to configure() */ - MinMaxFunction _func; - /** Helper to update min/max values **/ - template <typename T> - void update_min_max(T min, T max); - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - arm_compute::Mutex _mtx; /**< Mutex used for result reduction. */ -}; - -/** Interface for the kernel to find min max locations of an image. */ -class NEMinMaxLocationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEMinMaxLocationKernel"; - } - /** Default constructor */ - NEMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default; - /** Default destructor */ - ~NEMinMaxLocationKernel() = default; - - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] max Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_loc Array of minimum value locations. - * @param[out] max_loc Array of maximum value locations. - * @param[out] min_count Number of minimum value encounters. - * @param[out] max_count Number of maximum value encounters. - */ - void configure(const IImage *input, void *min, void *max, - ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr, - uint32_t *min_count = nullptr, uint32_t *max_count = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - /** Performs the min/max location algorithm on T type images on a given window. - * - * @param win The window to run the algorithm on. - */ - template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max> - void minmax_loc(const Window &win); - /** Common signature for all the specialised MinMaxLoc functions - * - * @param[in] window Region on which to execute the kernel. - */ - using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window); - /** MinMaxLoc function to use for the particular image types passed to configure() */ - MinMaxLocFunction _func; - /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */ - template <class T, typename> - struct create_func_table; - - const IImage *_input; /**< Input image. */ - void *_min; /**< Minimum value. */ - void *_max; /**< Maximum value. */ - uint32_t *_min_count; /**< Count of minimum value encounters. */ - uint32_t *_max_count; /**< Count of maximum value encounters. */ - ICoordinates2DArray *_min_loc; /**< Locations of minimum values. */ - ICoordinates2DArray *_max_loc; /**< Locations of maximum values. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h deleted file mode 100644 index 43594bacbf..0000000000 --- a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to apply a non-linear filter */ -class NENonLinearFilterKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENonLinearFilterKernel"; - } - /** Default constructor */ - NENonLinearFilterKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default; - /** Allow instances of this class to be moved */ - NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default; - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data type supported: U8 - * @param[out] output Destination tensor. Data type supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Fill mask with the corresponding given pattern. - * - * @param[in,out] mask Mask to be filled according to pattern - * @param[in] cols Columns (width) of mask - * @param[in] rows Rows (height) of mask - * @param[in] pattern Pattern to fill the mask according to - */ - void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern); - /** Apply a median filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void median_filter_box(const Window &win); - /** Apply a min filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void min_filter_box(const Window &win); - /** Apply a max filter when given mask pattern is defined as box. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void max_filter_box(const Window &win); - /** Apply a median filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void median_filter_cross(const Window &win); - /** Apply a min filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void min_filter_cross(const Window &win); - /** Apply a max filter when given mask pattern is defined as cross. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void max_filter_cross(const Window &win); - /** Apply a median filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void median_filter_disk(const Window &win); - /** Apply a min filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void min_filter_disk(const Window &win); - /** Apply a max filter when given mask pattern is defined as disk. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void max_filter_disk(const Window &win); - /** Apply a non-linear filter when given mask has user-defined pattern. - * - * @param[in] win Window to apply the filter on. - */ - template <int mask_w, int mask_h> - void non_linear_filter_generic(const Window &win); - -private: - unsigned int _border_width; - const ITensor *_input; - ITensor *_output; - const uint8_t *_mask; - MatrixPattern _pattern; - NonLinearFilterFunction _function; - unsigned int _func_idx; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h deleted file mode 100644 index e2ddec9a33..0000000000 --- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H -#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON - * - * @note Used by @ref NEFastCorners and @ref NEHarrisCorners - */ -class NENonMaximaSuppression3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENonMaximaSuppression3x3Kernel"; - } - /** Default constructor */ - NENonMaximaSuppression3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default; - /** Default destructor */ - ~NENonMaximaSuppression3x3Kernel() = default; - - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8/F32 - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -protected: - /** Common signature for all the specialised non-maxima suppression 3x3 functions - * - * @param[in] input_ptr Pointer to the input tensor. - * @param[out] output_ptr Pointer to the output tensor - * @param[in] input_stride Stride of the input tensor - */ - using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride); - - NonMaxSuppr3x3Function *_func; /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */ - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 - */ -class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel -{ -public: - const char *name() const override - { - return "NENonMaximaSuppression3x3FP16Kernel"; - } - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output, bool border_undefined); -}; -#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */ -using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -} // namespace arm_compute -#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h deleted file mode 100644 index 4727164d00..0000000000 --- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the normalization layer kernel. - */ -class NENormalizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NENormalizationLayerKernel"; - } - /** Default constructor */ - NENormalizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default; - /** Default move assignment operator */ - NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default; - /** Default destructor */ - ~NENormalizationLayerKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data type and layout supported: same as @p input. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - */ - void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. - * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], - * Data type and layout supported: same as @p input. - * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform normalization depending on the given template - * dimension. The second template parameter specifies whether the - * normalization has to be 1D or 2D. - * - * @note Only supported normalizations are: - * - 1D over X or Z - * - 2D over X and Y - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm> - void normalize_float(const Window &window); - - /** Common signature for all the specialised normalization functions - * - * @param[in] window Region on which to execute the kernel. - */ - using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window); - -private: - NormalizationFunction _func; - const ITensor *_input; - const ITensor *_input_squared; - ITensor *_output; - NormalizationLayerInfo _norm_info; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h deleted file mode 100644 index 4cbefbd1e3..0000000000 --- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H -#define ARM_COMPUTE_NEPADLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to add padding to a tensor - * - * Add padding given padding information - */ -class NEPadLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPadLayerKernel"; - } - /** Default constructor */ - NEPadLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPadLayerKernel(const NEPadLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPadLayerKernel(NEPadLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default; - /** Default destructor */ - ~NEPadLayerKernel() = default; - - /** Initialize the function - * - * @param[in] input Source tensor. Data types supported: All. - * @param[out] output Output tensor. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. - * Only CONSTANT padding mode is currently supported - */ - void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); - /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. - * - * @param[in] input Source tensor info. Data types supported: All. - * @param[in] output Output tensor info. Data type supported: same as @p input - * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] - * specifies the front and the end padding in the i-th dimension. - * @param[in] constant_value (Optional) Constant value to be used for the padding - * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT. - * Only CONSTANT padding mode is currently supported - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the padding function with constant padding - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T> - void run_pad_constant(const Window &window); - - /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window); - - /** Common signature for all the specialised permute functions - * - * @param[in] window Region on which to execute the kernel. - */ - using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window); - - PadFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - PaddingList _padding; - PixelValue _constant_value; - PaddingMode _mode; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h deleted file mode 100644 index 89dc4e6fc7..0000000000 --- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H -#define ARM_COMPUTE_NEPERMUTEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** NEON kernel to perform tensor permutation. - * - * Permutes given a permutation vector - */ -class NEPermuteKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPermuteKernel"; - } - /** Default constructor */ - NEPermuteKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPermuteKernel(const NEPermuteKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPermuteKernel &operator=(const NEPermuteKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPermuteKernel(NEPermuteKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPermuteKernel &operator=(NEPermuteKernel &&) = default; - /** Default destructor */ - ~NEPermuteKernel() = default; - - /** Set the input and output of the kernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - */ - void configure(const ITensor *input, ITensor *output, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] input The input tensor to permute. Data types supported: All - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Template function to run the permute - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T> - void run_permute(const Window &window); - - /** Common signature for all the specialised permute functions - * - * @param[in] window Region on which to execute the kernel. - */ - using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window); - - PermuteFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - PermutationVector _perm; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h deleted file mode 100644 index 1a9dd6be2e..0000000000 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H -#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to perform addition between two tensors */ -class NEPixelWiseMultiplicationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPixelWiseMultiplicationKernel"; - } - /** Default constructor */ - NEPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default; - /** Default destructor */ - ~NEPixelWiseMultiplicationKernel() = default; - /** Initialise the kernel's input, output and border mode. - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[out] output Output tensor. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. - * @param[in] rounding_policy Rounding policy. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel - * - * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. - * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. - * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output Output tensor info. Data types supported: - * - U8, only if both inputs are U8. - * - QASYMM8, only if both inputs are QASYMM8. - * - QASYMM8_SIGNED, only if @p input1 is QASYMM8_SIGNED. - * - S16. - * - QSYMM16, only if both inputs are QSYMM16. - * - S32, only if both inputs are QSYMM16. - * - F16, only if @p input1 is F16. - * - F32, only if both inputs are F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. - * @param[in] rounding_policy Rounding policy. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Common signature for all the specialised multiplication functions with integer scaling factor - * - * @param[in] input1_ptr Pointer to the first input tensor. - * @param[in] input2_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor. - * @param[in] scale Integer scale factor. - */ - using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); - /** Common signature for all the specialised multiplication functions with float scaling factor - * - * @param[in] input1_ptr Pointer to the first input tensor. - * @param[in] input2_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor. - * @param[in] scale Float scale factor. - */ - using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); - /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor - * - * @param[in] input1_ptr Pointer to the first input tensor. - * @param[in] input2_ptr Pointer to the second input tensor. - * @param[out] output_ptr Pointer to the output tensor. - * @param[in] scale Float scale factor. - * @param[in] input1_qua_info Quantization Info of tensor input1. - * @param[in] input2_qua_info Quantization Info of tensor input2. - * @param[in] output_qua_info Quantization Info of tensor output. - * - */ - using MulFunctionQuantized = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info); - - MulFunctionFloat *_func_float; - MulFunctionInt *_func_int; - MulFunctionQuantized *_func_quantized; - -private: - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - float _scale; - int _scale_exponent; - bool _run_optimized_qasymm8; -}; - -/** Interface for the complex pixelwise multiplication kernel. */ -class NEComplexPixelWiseMultiplicationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEComplexPixelWiseMultiplicationKernel"; - } - /** Default constructor.*/ - NEComplexPixelWiseMultiplicationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComplexPixelWiseMultiplicationKernel(const NEComplexPixelWiseMultiplicationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEComplexPixelWiseMultiplicationKernel &operator=(const NEComplexPixelWiseMultiplicationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEComplexPixelWiseMultiplicationKernel(NEComplexPixelWiseMultiplicationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEComplexPixelWiseMultiplicationKernel &operator=(NEComplexPixelWiseMultiplicationKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel - * - * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor). - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; -}; - -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h deleted file mode 100644 index b0574b7cf6..0000000000 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the pooling layer kernel */ -class NEPoolingLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPoolingLayerKernel"; - } - /** Default constructor */ - NEPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default; - /** Default destructor */ - ~NEPoolingLayerKernel() = default; - /** Set the input and output tensors. - * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. - */ - void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel - * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** Function to perform 2x2 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - */ - void pooling2_f32_nchw_maxpool_indices(const Window &window_input, const Window &window); - /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - */ - void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window); - /** Function to perform MxN pooling for 32-bit floating point values. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 32-bit floating point values (NHWC). - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 7x7 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 3x3 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 2x2 pooling for float16_t. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform 3x3 pooling. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 16-bit floating point values. - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Function to perform MxN pooling for 16-bit floating point values. (NHWC) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template <typename T> - void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template <typename T> - void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform MxN pooling for 8-bit quantized. (NCHW) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template <typename T> - void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Template function to perform MxN pooling for 8-bit quantized. (NHWC) - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - template <typename T> - void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false); - /** Common signature for all the specialised Pooling functions - * - * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. - * @param[in] pooling_type Pooling operation to be computed. - * @param[in] exclude_padding Flag to specify exclusion of padding from the operation. - */ - using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding); - -private: - PoolingFunction _func; - const ITensor *_input; - ITensor *_output; - ITensor *_indices; - PoolingLayerInfo _pool_info; - DataLayout _data_layout; - unsigned int _num_elems_processed_per_iteration; - BorderSize _border_size; - bool _is_square; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h deleted file mode 100644 index 6bf6574568..0000000000 --- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H -#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to calculate prior boxes */ -class NEPriorBoxLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEPriorBoxLayerKernel"; - } - /** Default constructor */ - NEPriorBoxLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1 - * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input - * @param[in] info Prior box layer info. - */ - void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel - * - * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. - * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1 - * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input - * @param[in] info Prior box layer info. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Stores the coordinates of the calculated prior boxes. - * - * @param[out] out Output pointer. - * @param[in] offset Output offset to write to. - * @param[in] center_x Center pixel value on x-axis. - * @param[in] center_y Center pixel value on y-axis. - * @param[in] box_width Prior box width. - * @param[in] box_height Prior box height. - * @param[in] width Input width. - * @param[in] height Input height. - */ - void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height); - /** Function to calculate prior boxes. - * - * @param[in] window Input region on which to execute the kernel. - */ - void calculate_prior_boxes(const Window &window); - - const ITensor *_input1; - const ITensor *_input2; - ITensor *_output; - PriorBoxLayerInfo _info; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h deleted file mode 100644 index f5e8da7feb..0000000000 --- a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H -#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include <functional> - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform layer normalization */ -class NEQLSTMLayerNormalizationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEQLSTMLayerNormalizationKernel"; - } - /** Default constructor */ - NEQLSTMLayerNormalizationKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete; - /** Default Move Constructor. */ - NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default; - /** Default move assignment operator */ - NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default; - /** Default destructor */ - ~NEQLSTMLayerNormalizationKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QSYMM16. - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] weight Weight tensor. Data types supported: Same as @p input. - * @param[in] bias Bias tensor. Data types supported: S32 - */ - void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias); - /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel - * - * @param[in] input Source tensor info. Data types supported: QSYMM16. - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] weight Weight tensor info. Data types supported: Same as @p input. - * @param[in] bias Bias tensor info. Data types supported: S32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - // constants - static constexpr uint32_t max_input_dimension{ 2 }; /**< The maximum input dimension supported */ - static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */ - static constexpr uint32_t max_bias_dimension{ 1 }; /**< The maximum bias dimension supported */ - static constexpr uint32_t vector_size_byte{ 16 }; /**< Computation vector size in byte */ - - using ComputeFuncType = std::function<void(NEQLSTMLayerNormalizationKernel &)>; - - ComputeFuncType _fn{}; /**< Function pointer to computation function */ - - const ITensor *_input{ nullptr }; /**< Input tensor */ - const ITensor *_weight{ nullptr }; /**< Weight tensor */ - const ITensor *_bias{ nullptr }; /**< Bias tensor */ - ITensor *_output{ nullptr }; /**< Output tensor */ - - int32_t _output_multiplier{}; /**< Multiplier for output values */ - int32_t _output_shift{}; /**< Shift value for output values */ - - int32_t _window_start_x{}; /**< The beginning of x-axis iteration */ - int32_t _window_end_x{}; /**< The end of x-axis iteration */ - int32_t _window_step_x{}; /**< The size of x-axis iteration's step */ - - Window _inout_window{}; /**< Window for input and output tensor */ - Window _weight_window{}; /**< Window for weight and bias tensor */ - - /** Function to configure initial windows for destination of computation - * - * @param[in] Target destination tensor to use for output window - * - * @return configured window - */ - Window configure_window(ITensor *target); - // Function to compute for data type QSYMM16 - void compute_qsymm16(); - /** Function to compute summation and summation of squared input of the given input pointer - * - * @param[in] Input_ptr pointer to input array - * - */ - std::pair<int64_t, int64_t> sum_qsymm16(const int16_t *input_ptr); - /** Function to normalize values using computed mean and standard deviation - * - * @param[in] input_ptr Pointer to input array - * @param[in] output_ptr Pointer to output array - * @param[in] weight_ptr Pointer to weight array - * @param[in] bias_ptr Pointer to bias array - * @param[in] mean Mean value - * @param[in] inv_std_mul Quantized multiplier for standard deviation - * @param[in] inv_std_shift Shift for standard deviation - * - */ - void normalize_qasymm16(const int16_t *input_ptr, - int16_t *output_ptr, - const int16_t *weight_ptr, - const int32_t *bias_ptr, - int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift); - /** Function to compute output quantization information */ - QuantizationInfo compute_output_qinfo(); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h deleted file mode 100644 index 087e767b73..0000000000 --- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H -#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the quantization layer kernel. - * - * @note The implementation supports only 3D input tensors - * - */ -class NEQuantizationLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEQuantizationLayerKernel"; - } - /** Default constructor */ - NEQuantizationLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete; - /** Default Move Constructor. */ - NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default; - /** Default move assignment operator */ - NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default; - /** Default destructor */ - ~NEQuantizationLayerKernel() = default; - /** Set the input, output. - * - * @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this kernel - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions - * - * @param[in] window Region on which to execute the kernel. - */ - using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window); - /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor. - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename TIn, typename TOut> - void run_quantize_qasymm8(const Window &window); - /** Function to apply QASYMM16 quantization on a tensor. - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename T> - void run_quantize_qasymm16(const Window &window); - - const ITensor *_input; - ITensor *_output; - - QuantizationFunctionExecutorPtr _func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h deleted file mode 100644 index bebcab5359..0000000000 --- a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H -#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the RoIAlign kernel. - */ -class NEROIAlignLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEROIAlignLayerKernel"; - } - - /** Constructor */ - NEROIAlignLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete; - /** Default Move Constructor. */ - NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default; - /** Default move assignment operator. */ - NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default; - /** Default destructor */ - ~NEROIAlignLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. - * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); - /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, - * otherwise same as @p input - * @param[in] output Destination tensor info. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. - * - * @return a Status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - template <DataLayout data_layout, typename input_data_type, typename roi_data_type = input_data_type> - void internal_run(const Window &window, const ThreadInfo &info); - - const ITensor *_input; - ITensor *_output; - const ITensor *_rois; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h deleted file mode 100644 index 59a5017711..0000000000 --- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H -#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/IArray.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the ROI pooling layer kernel */ -class NEROIPoolingLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEROIPoolingLayerKernel"; - } - /** Default constructor */ - NEROIPoolingLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default; - /** Default destructor */ - ~NEROIPoolingLayerKernel() = default; - - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: F32. - * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner - * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16 - * @param[out] output Destination tensor. Data types supported: Same as @p input. - * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo. - * - * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled - * width and pooled height. - * @note The z dimensions of @p output tensor and @p input tensor must be the same. - * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor. - */ - void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_rois; - ITensor *_output; - ROIPoolingLayerInfo _pool_info; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NERangeKernel.h b/arm_compute/core/NEON/kernels/NERangeKernel.h deleted file mode 100644 index e67a5dc945..0000000000 --- a/arm_compute/core/NEON/kernels/NERangeKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NERANGEKERNEL_H -#define ARM_COMPUTE_NERANGEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Kernel class for Range - * - * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments - * of 'step' up to but not including 'end'. - */ -class NERangeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NERangeKernel"; - } - /** Default constructor */ - NERangeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERangeKernel(const NERangeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERangeKernel &operator=(const NERangeKernel &) = delete; - /** Allow instances of this class to be moved */ - NERangeKernel(NERangeKernel &&) = default; - /** Allow instances of this class to be moved */ - NERangeKernel &operator=(NERangeKernel &&) = default; - /** Default destructor */ - ~NERangeKernel() = default; - /** Initialize the kernel's output tensor, start, end and step of the sequence. - * - * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - */ - void configure(ITensor *output, float start, float end, float step); - /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel - * - * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] start The starting value of the sequence. - * @param[in] end The ending (not including) value of the sequence. - * @param[in] step The gap between each pair of values in the sequence. - * - * @return a status - */ - static Status validate(const ITensorInfo *output, float start, float end, float step); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using RangeFunction = void(ITensor *output, float start, float step, const Window &window); - - RangeFunction *_func; /**< Range function to be called */ - float _start; /**< Start of sequence */ - float _end; /**< End of sequence */ - float _step; /**< Increment/step value */ - ITensor *_output; /**< Destination tensor */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NERANGEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h deleted file mode 100644 index 28cca4987b..0000000000 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H -#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a reduction operation - * - * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized - * output tensor is signed 32-bit integer (S32). It is the user's responsibility - * to check that the results do not overflow because the indices are computed - * in unsigned 32-bit (U32). - */ -class NEReductionOperationKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReductionOperationKernel"; - } - /** Default constructor */ - NEReductionOperationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernel(const NEReductionOperationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete; - /** Allow instances of this class to be moved */ - NEReductionOperationKernel(NEReductionOperationKernel &&) = default; - /** Allow instances of this class to be moved */ - NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default; - /** Default destructor */ - ~NEReductionOperationKernel() = default; - - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. - * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - */ - void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. - * - * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. - * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. - * Output will have the same number of dimensions as input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _reduction_axis; - ReductionOperation _op; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h deleted file mode 100644 index e929b1c5d4..0000000000 --- a/arm_compute/core/NEON/kernels/NERemapKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREMAPKERNEL_H -#define ARM_COMPUTE_NEREMAPKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a remap on a tensor */ -class NERemapKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NERemapKernel"; - } - /** Default constructor */ - NERemapKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel(const NERemapKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NERemapKernel &operator=(const NERemapKernel &) = delete; - /** Allow instances of this class to be moved */ - NERemapKernel(NERemapKernel &&) = default; - /** Allow instances of this class to be moved */ - NERemapKernel &operator=(NERemapKernel &&) = default; - /** Default destructor */ - ~NERemapKernel() = default; - - /** Initialize the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[in] map_x Map for X coordinates. Data type supported: F32. - * @param[in] map_y Map for Y coordinates. Data type supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - */ - void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** function to perform nearest interpolation on the given window */ - void remap_nearest(const Window &window); - /** function to perform bilinear interpolation on the given window */ - void remap_bilinear(const Window &window); - /** Remap function to use for the particular interpolation type passed to configure() */ - void (NERemapKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input image */ - ITensor *_output; /**< Output image */ - const ITensor *_map_x; /**< Input remap x coordinates */ - const ITensor *_map_y; /**< Input remap y coordinates */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h deleted file mode 100644 index 9277ddbe47..0000000000 --- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H -#define ARM_COMPUTE_NEREORGLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor re-organization */ -class NEReorgLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReorgLayerKernel"; - } - /** Default constructor */ - NEReorgLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReorgLayerKernel(const NEReorgLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete; - /** Default Move Constructor. */ - NEReorgLayerKernel(NEReorgLayerKernel &&) = default; - /** Default move assignment operator */ - NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default; - /** Default destructor */ - ~NEReorgLayerKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] stride Stride to be used during data re-organization. - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - */ - void configure(const ITensor *input, ITensor *output, int32_t stride); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] stride Stride to be used during data re-organization - * It defines the spatial distance between 2 consecutive pixels in the x and y direction - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - int32_t _stride; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h deleted file mode 100644 index fccf2685a8..0000000000 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H -#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor reshaping */ -class NEReshapeLayerKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NEReshapeLayerKernel"; - } - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h deleted file mode 100644 index 516653b70d..0000000000 --- a/arm_compute/core/NEON/kernels/NEReverseKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H -#define ARM_COMPUTE_NEREVERSEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the reverse layer kernel. */ -class NEReverseKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEReverseKernel"; - } - /** Default constructor */ - NEReverseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReverseKernel(const NEReverseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEReverseKernel &operator=(const NEReverseKernel &) = delete; - /** Allow instances of this class to be moved */ - NEReverseKernel(NEReverseKernel &&) = default; - /** Allow instances of this class to be moved */ - NEReverseKernel &operator=(NEReverseKernel &&) = default; - /** Default destructor */ - ~NEReverseKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 - */ - void configure(const ITensor *input, ITensor *output, const ITensor *axis); - - /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - const ITensor *_axis; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h deleted file mode 100644 index 9bc04129e0..0000000000 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCALEKERNEL_H -#define ARM_COMPUTE_NESCALEKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform scaling on a tensor */ -class NEScaleKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEScaleKernel"; - } - /** Default constructor */ - NEScaleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScaleKernel(const NEScaleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScaleKernel &operator=(const NEScaleKernel &) = delete; - /** Allow instances of this class to be moved */ - NEScaleKernel(NEScaleKernel &&) = default; - /** Allow instances of this class to be moved */ - NEScaleKernel &operator=(NEScaleKernel &&) = default; - /** Default destructor */ - ~NEScaleKernel() = default; - - /** Initialise the kernel's inputs, output and interpolation policy - * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * @note Using @p policy Area only supports data layout NCHW and input data type U8. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 - * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo to use for configuration - */ - void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, - const ScaleKernelInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel - * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * @note Using @p policy Area only supports data layout NCHW and input data type U8. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 - * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[in] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo to use for validation - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output, - const ScaleKernelInfo &info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - /** function to perform scale using nearest interpolation on the given window */ - void scale_nearest_nchw(const Window &window); - /** function to perform scale using bilinear interpolation on the given window */ - void scale_bilinear_nchw(const Window &window); - /** function to perform scale using area interpolation on the given window - * - * @note Used only in case down-sampling. - */ - void scale_area_nchw(const Window &window); - /** function to perform scale on the given window */ - void scale_nhwc(const Window &window); - /** Scale function to use for the particular interpolation type passed to configure() */ - void (NEScaleKernel::*_func)(const Window &window); - - const ITensor *_offsets; - const ITensor *_dx; - const ITensor *_dy; - const ITensor *_input; - ITensor *_output; - InterpolationPolicy _policy; - BorderSize _border_size; - BorderMode _border_mode; - PixelValue _constant_border_value; - float _sampling_offset; - bool _use_padding; - bool _align_corners; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESCALEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h deleted file mode 100644 index 320b44d307..0000000000 --- a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H -#define ARM_COMPUTE_NESCHARR3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * -* @f[ -* \mathbf{G}_x=\begin{vmatrix} -* -3 & 0 & +3\\ -* -10& 0 & +10\\ -* -3 & 0 & +3 -* \end{vmatrix} -* @f] -*/ -class NEScharr3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEScharr3x3Kernel"; - } - /** Default constructor */ - NEScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default; - /** Default destructor */ - ~NEScharr3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for scharr X */ - ITensor *_output_y; /**< Output tensor for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h deleted file mode 100644 index 51c8543ddc..0000000000 --- a/arm_compute/core/NEON/kernels/NESelectKernel.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESELECTKERNEL_H -#define ARM_COMPUTE_NESELECTKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the select kernel - * - * Select is computed by: - * @f[ output(i) = condition(i) ? x(i) : y(i) @f] - * - */ -class NESelectKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESelectKernel"; - } - /** Default constructor */ - NESelectKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESelectKernel(const NESelectKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESelectKernel &operator=(const NESelectKernel &) = delete; - /** Allow instances of this class to be moved */ - NESelectKernel(NESelectKernel &&) = default; - /** Allow instances of this class to be moved */ - NESelectKernel &operator=(NESelectKernel &&) = default; - /** Default destructor */ - ~NESelectKernel() = default; - - /** Common signature for all the specialised elementwise functions - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[out] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x - */ - void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output); - - /** Validate the argument passed to the kernel - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - * - * @return a status - */ - static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the specialised select functions - * - * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: All. - * @param[in] y Second input tensor. Data types supported: Same as @p x - * @param[in] output Output tensor. Data types supported: Same as @p x. - */ - using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window); - - /** Select function to use for the particular tensor types passed to configure() */ - SelectFunction *_function; - const ITensor *_c; /**< Condition tensor */ - const ITensor *_x; /**< Source tensor 1 */ - const ITensor *_y; /**< Source tensor 2 */ - ITensor *_output; /**< Destination tensor */ - bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESELECTKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h deleted file mode 100644 index ef0db2a428..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H -#define ARM_COMPUTE_NESOBEL3x3KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -1 & 0 & +1\\ - * -2 & 0 & +2\\ - * -1 & 0 & +1 - * \end{vmatrix} - * @f] -*/ -class NESobel3x3Kernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel3x3Kernel"; - } - /** Default constructor */ - NESobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel(const NESobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel(NESobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default; - /** Default destructor */ - ~NESobel3x3Kernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< Output tensor for sobel X */ - ITensor *_output_y; /**< Output tensor for sobel Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h deleted file mode 100644 index bc0cfb016e..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H -#define ARM_COMPUTE_NESOBEL5x5KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. - * - */ -class NESobel5x5HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5HorKernel"; - } - /** Default constructor */ - NESobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default; - /** Default destructor */ - ~NESobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor. - * -*/ -class NESobel5x5VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel5x5VertKernel"; - } - /** Default constructor */ - NESobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default; - /** Default destructor */ - ~NESobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input_x Input for X (X output of hor pass). Data type supported: S16. - * @param[in] input_y Input for Y (Y output of hor pass). Data type supported: S16. - * @param[out] output_x Destination tensor for the X gradient. Data type supported: S16. - * @param[out] output_y Destination tensor for the Y gradient. Data type supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - ITensor *_input_x; /**< X input (X output of the hor pass) */ - ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h deleted file mode 100644 index 468a94d0d1..0000000000 --- a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H -#define ARM_COMPUTE_NESOBEL7x7KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. - * - */ -class NESobel7x7HorKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7HorKernel"; - } - /** Default constructor */ - NESobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default; - /** Default destructor */ - ~NESobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input; /**< Input tensor */ - ITensor *_output_x; /**< X output of horizontal pass */ - ITensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor. - * -*/ -class NESobel7x7VertKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESobel7x7VertKernel"; - } - /** Default constructor */ - NESobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default; - /** Default destructor */ - ~NESobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @note At least one of output_x or output_y must be set - * @note If output_x is set then input_x must be set too - * @note If output_y is set then input_y must be set too - * - * @param[in] input_x (Optional) Input for X (X output of hor pass). Data type supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of hor pass). Data type supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient. Data type supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient. Data type supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - const ITensor *_input_x; /**< X input (X output of the hor pass) */ - const ITensor *_input_y; /**< Y input (Y output of the hor pass) */ - ITensor *_output_x; /**< X output of sobel */ - ITensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h deleted file mode 100644 index 0e0be7936b..0000000000 --- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H -#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the identifying the max value of 1D Logits */ -class NELogits1DMaxKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NELogits1DMaxKernel"; - } - /** Default constructor */ - NELogits1DMaxKernel(); - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] output Destination tensor. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - BorderSize border_size() const override; - -private: - using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window); - -private: - Logits1DMaxFunction *_func; - BorderSize _border_size; -}; - -/** Interface for softmax computation for QASYMM8 with pre-computed max. */ -template <bool IS_LOG = false> -class NELogits1DSoftmaxKernel : public INEKernel -{ -public: - const char *name() const override - { - if(IS_LOG) - { - return "NELogits1DSoftmaxKernel"; - } - else - { - return "NELogits1DLogSoftmaxKernel"; - } - } - /** Default constructor */ - NELogits1DSoftmaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default; - /** Allow instances of this class to be moved */ - NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default; - /** Default destructor */ - ~NELogits1DSoftmaxKernel() = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * - * @param tmp Auxiliary tensor. Must be type F32 and same shape as the input. - */ - void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp); - /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1. - * Data types supported: same as @p input. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] beta A scaling factor for the exponent. - * @param[in] tmp Tensor info of auxiliary. Must be type F32 and same shape as the input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *max, - const ITensorInfo *output, const float beta, const ITensorInfo *tmp); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, - const Window &window); - - LogitsSoftmaxFunction *_func; - const ITensor *_input; - const ITensor *_max; - ITensor *_output; - float _beta; - ITensor *_tmp; //Temporary. Used internally -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h deleted file mode 100644 index 532fbb2852..0000000000 --- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H -#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declaration -class ITensor; - -/** Interface for the space to batch kernel */ -class NESpaceToBatchLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESpaceToBatchLayerKernel"; - } - /** Default constructor */ - NESpaceToBatchLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default; - /** Default destructor */ - ~NESpaceToBatchLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); - /** Initialise the kernel's input and output. (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[out] output Tensor output. Data types supported: same as @p input - */ - void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 - * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings) - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[in] block_shape_x Block shape x value. - * @param[in] block_shape_y Block shape y value. - * @param[in] padding_left The left padding of the output tensor. - * @param[in] padding_right The right padding of the output tensor. - * @param[in] output Tensor output. Data types supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - const ITensor *_block_shape; /**< Block shape tensor */ - const ITensor *_paddings; /**< Paddings tensor */ - ITensor *_output; /**< Destination tensor */ - DataLayout _data_layout; /**< Data layout to be used at run-time */ - - Size2D _padding_left; - int _block_shape_x; - int _block_shape_y; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h deleted file mode 100644 index e0c22e65fb..0000000000 --- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H -#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the space to depth kernel */ -class NESpaceToDepthLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NESpaceToDepthLayerKernel"; - } - /** Default constructor */ - NESpaceToDepthLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default; - /** Default destructor */ - ~NESpaceToDepthLayerKernel() = default; - /** Initialise the kernel's inputs and output. - * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. - * @param[out] output Tensor output. Data types supported: same as @p input - * @param[in] block_shape Block shape value - */ - void configure(const ITensor *input, ITensor *output, int32_t block_shape); - /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel - * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. - * @param[in] output Tensor output info. Data types supported: same as @p input - * @param[in] block_shape Block shape value - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - int32_t _block_shape; /**< Block shape */ - DataLayout _data_layout; /**< Data layout of the operation */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h deleted file mode 100644 index c4dc53eac6..0000000000 --- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H -#define ARM_COMPUTE_NESTACKLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/ -class NEStackLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEStackLayerKernel"; - } - /** Default constructor */ - NEStackLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStackLayerKernel(const NEStackLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEStackLayerKernel(NEStackLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default; - /** Default destructor */ - ~NEStackLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack. - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel - * - * @note Supported input tensor rank: up to 4 - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. - * @param[in] idx_input Index of the input tensor in the list of tensors to stack - * All tensors in the list must have the same shape - * @param[in] num_tensors Number of tensors to stack - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); - - // Inherited methods overridden - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _axis; - unsigned int _idx_input; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h deleted file mode 100644 index 6709619a62..0000000000 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H -#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the kernel to perform tensor strided slicing */ -class NEStridedSliceKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEStridedSliceKernel"; - } - /** Default constructor */ - NEStridedSliceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSliceKernel(const NEStridedSliceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete; - /** Allow instances of this class to be moved */ - NEStridedSliceKernel(NEStridedSliceKernel &&) = default; - /** Allow instances of this class to be moved */ - NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default; - /** Default destructor */ - ~NEStridedSliceKernel() = default; - /** Configure kernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor. Data type supported: All - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - void configure(const ITensor *input, ITensor *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] input Source tensor info. Data type supported: All - * @param[in] output Destination tensor info. Data type supported: Same as @p input - * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input). - * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. - * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. - * A slice of size 1 starting from starts[i] in the dimension must be preserved. - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; /**< Source tensor */ - ITensor *_output; /**< Destination tensor */ - Coordinates _starts_abs; /**< Absolute start coordinates */ - Coordinates _final_strides; /**< Final strides */ - int32_t _shrink_mask; /**< Shrink axis mask */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h deleted file mode 100644 index 13a76cb40e..0000000000 --- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H -#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H - -#include "arm_compute/core/NEON/INESimpleKernel.h" - -namespace arm_compute -{ -class ITensor; -class ILut; - -/** Interface for the kernel to perform table lookup calculations. */ -class NETableLookupKernel : public INESimpleKernel -{ -public: - const char *name() const override - { - return "NETableLookupKernel"; - } - /** Default constructor */ - NETableLookupKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel(const NETableLookupKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETableLookupKernel &operator=(const NETableLookupKernel &) = delete; - /** Allow instances of this class to be moved */ - NETableLookupKernel(NETableLookupKernel &&) = default; - /** Allow instances of this class to be moved */ - NETableLookupKernel &operator=(NETableLookupKernel &&) = default; - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8/S16. - * @param[in] lut The input LUT. - * @param[out] output The output tensor. Data types supported: same as @p input - */ - void configure(const ITensor *input, const ILut *lut, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Perform table lookup on a given window. - * - * @param window window Region on which to execute the kernel. - */ - template <class T> - void tableLookup(const Window &window); - /** Common signature for all the specialised lut functions - * - * @param[in] window Region on which to execute the kernel. - */ - using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window); - /** Sub function to use for the particular tensor types passed to configure() */ - TableLookupFunction _func; - const ILut *_lut; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h deleted file mode 100644 index a6d1e9071c..0000000000 --- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H -#define ARM_COMPUTE_NETHRESHOLDKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <cstdint> - -namespace arm_compute -{ -class ITensor; - -/** Interface for the thresholding kernel - * - */ -class NEThresholdKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEThresholdKernel"; - } - /** Constructor - * Initialize all the pointers to nullptr and parameters to zero. - */ - NEThresholdKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel(const NEThresholdKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; - /** Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. - */ - void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** run binary thresholding on the given window */ - void run_binary(const Window &window); - /** run range thresholding on the given window */ - void run_range(const Window &window); - - void (NEThresholdKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input */ - ITensor *_output; /**< Output */ - uint8_t _threshold; - uint8_t _false_value; - uint8_t _true_value; - uint8_t _upper; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h deleted file mode 100644 index a64470ffd0..0000000000 --- a/arm_compute/core/NEON/kernels/NETileKernel.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETILEKERNEL_H -#define ARM_COMPUTE_NETILEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform a tile operation */ -class NETileKernel : public INEKernel -{ -public: - /** Default constructor */ - NETileKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NETileKernel(const NETileKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - NETileKernel &operator=(const NETileKernel &) = delete; - /** Allow instances of this class to be moved */ - NETileKernel(NETileKernel &&) = default; - /** Allow instances of this class to be moved */ - NETileKernel &operator=(NETileKernel &&) = default; - const char *name() const override - { - return "NETileKernel"; - } - /** Set the source, destination of the kernel - * - * @param[in] input Source tensor. Data type supported: All. - * @param[out] output Destination tensor. Same as @p input - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - */ - void configure(const ITensor *input, ITensor *output, const Multiples &multiples); - /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel - * - * @param[in] input Source tensor info. Data type supported: All. - * @param[in] output Destination tensor info. Same as @p input - * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETILEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h deleted file mode 100644 index a14dece0d6..0000000000 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H -#define ARM_COMPUTE_NETRANSPOSEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel which transposes the elements of a matrix. - * - * [width, height, batch] -> [height, width, batch] - * - */ -class NETransposeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NETransposeKernel"; - } - /** Default constructor */ - NETransposeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeKernel(const NETransposeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NETransposeKernel &operator=(const NETransposeKernel &) = delete; - /** Allow instances of this class to be moved */ - NETransposeKernel(NETransposeKernel &&) = default; - /** Allow instances of this class to be moved */ - NETransposeKernel &operator=(NETransposeKernel &&) = default; - /** Default destructor */ - ~NETransposeKernel() = default; - - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: All - * @param[out] output Output tensor. Data type supported: Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] output Output tensor. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Common signature for all the transpose functions - * - * @param[in] input An input tensor. Data types supported: All - * @param[out] output The output tensor. Data type supported: same as @p input - * @param[in] window Region on which to execute the kernel. - */ - using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window); - /** Transpose function to use for the particular tensor types passed to configure() */ - TransposeFunction *_func; - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h deleted file mode 100644 index 1ea3f974e7..0000000000 --- a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H -#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the Upsample layer kernel.*/ -class NEUpsampleLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEUpsampleLayerKernel"; - } - /** Default constructor */ - NEUpsampleLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete; - /** Default Move Constructor. */ - NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default; - /** Default move assignment operator */ - NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default; - /** Default destructor */ - ~NEUpsampleLayerKernel() = default; - /** Set the input output tensors. - * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - */ - void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types supported: same as @p input. - * @param[in] info Contains stride information described in @ref Size2D. - * @param[in] policy Defines the policy to fill the intermediate pixels. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to run upsample layer (NCHW) - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T, int S> - void upsample_nchw(const Window &window); - /** Function to run upsample layer (NHWC) - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template <typename T, int S> - void upsample_nhwc(const Window &window); - - using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window); - -private: - UpsampleFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - Size2D _info; - unsigned int _num_elems_processed_per_iteration_x; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h deleted file mode 100644 index 61ca21eb48..0000000000 --- a/arm_compute/core/NEON/kernels/NEWarpKernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWARPKERNEL_H -#define ARM_COMPUTE_NEWARPKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include <array> -#include <cstdint> -namespace arm_compute -{ -class ITensor; - -/** Common interface for warp affine and warp perspective */ -class INEWarpKernel : public INEKernel -{ -public: - /** Default constructor */ - INEWarpKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel(const INEWarpKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWarpKernel &operator=(const INEWarpKernel &) = delete; - /** Allow instances of this class to be moved */ - INEWarpKernel(INEWarpKernel &&) = default; - /** Allow instances of this class to be moved */ - INEWarpKernel &operator=(INEWarpKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data type supported: U8. - * @param[out] output Destination tensor. Data type supported: U8. - * @param[in] matrix The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float. - * The matrix argument requires 9 values, for the affine case the last 3 values are ignored. - * @param[in] border_mode Strategy to use for borders - * @param[in] constant_border_value Constant value used for filling the border. - */ - virtual void configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - // Inherited methods overridden: - BorderSize border_size() const override; - -protected: - /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_undefined(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_constant(const Window &window) = 0; - /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE - * - * @param[in] window Region on which to execute the kernel - */ - virtual void warp_replicate(const Window &window) = 0; - /** Common signature for all the specialised warp functions - * - * @param[in] window Region on which to execute the kernel. - */ - void (INEWarpKernel::*_func)(const Window &window); - - const ITensor *_input; /**< Input Tensor */ - ITensor *_output; /**< Output Tensor */ - uint8_t _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */ - std::array<float, 9> _matrix; /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */ -}; - -/** Template interface for the kernel to compute warp affine - * - */ -template <InterpolationPolicy interpolation> -class NEWarpAffineKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpAffineKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; - -/** Template interface for the kernel to compute warp perspective - * - */ -template <InterpolationPolicy interpolation> -class NEWarpPerspectiveKernel : public INEWarpKernel -{ -private: - const char *name() const override - { - return "NEWarpPerspectiveKernel"; - } - // Inherited methods overridden: - void warp_undefined(const Window &window) override; - void warp_constant(const Window &window) override; - void warp_replicate(const Window &window) override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEWARPKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h deleted file mode 100644 index b68cb50c7b..0000000000 --- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H -#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer - * - * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels. - * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication. - * - * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have: - * @f[ - * \left( \begin{array}{ccc} - * a000 & a001 & a002 \\ - * a010 & a011 & a012 \\ - * a020 & a021 & a022 \\ - * \end{array} \right) - * \left( \begin{array}{ccc} - * a100 & a101 & a102 \\ - * a110 & a111 & a112 \\ - * a120 & a121 & a122 \\ - * \end{array} \right) - * \rightarrow - * \left( \begin{array}{ccccccccc} - * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\ - * \end{array} \right) - * @f] - */ -class NEWeightsReshapeKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEWeightsReshapeKernel"; - } - /** Constructor.*/ - NEWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default; - /** Default destructor */ - ~NEWeightsReshapeKernel() = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/FP16/F32 - * @param[in] bias The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[out] output The output tensor. Data types supported: Same as @p input - */ - void configure(const ITensor *input, const ITensor *bias, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel - * - * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, - * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. - * Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32 - * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with - * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input - * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. - * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h deleted file mode 100644 index f22f18f09f..0000000000 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class NEWidthConcatenateLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEWidthConcatenateLayerKernel"; - } - /** Default constructor */ - NEWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~NEWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ITensor *input, unsigned int width_offset, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; - unsigned int _width_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h deleted file mode 100644 index 1740df0312..0000000000 --- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H -#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp" -#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp" -#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Interface for the NEON kernel to perform Winograd input transform. */ -class INEWinogradLayerTransformInputKernel : public INEKernel -{ -public: - /** Get the working space required to perform the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param num_threads The greatest number of threads that will be used to execute the transform. - * @return Size of working space required in bytes. - */ - virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0; - - /** Determine how much memory (in units of TIn) to allocate for the - * transformed input. - * - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_channels Number of feature maps in the input tensor. - * @param[in] num_rows Number of rows in each feature map. - * @param[in] num_cols Number of columns in each feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - * - * @return Storage size (in units of TIn) required. - */ - virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0; - - /** Gets the stride between matrices in the input worspace - * - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_channels Number of feature maps in the input tensor. - * @param[in] num_rows Number of rows in each feature map. - * @param[in] num_cols Number of columns in each feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - * - * @return Stride expressed in bytes. - */ - virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0; - - /** Configure the output transform kernel. - * - * @param[in] input_nhwc Input tensor in NHWC data layout format. - * @param[in] num_batches Number of batches in input tensor. - * @param[in] num_rows Number of rows in input tensor. - * @param[in] num_cols Number of columns in input tensor. - * @param[in] num_channels Number of channels in input tensor. - * @param[in] padding Padding type. - * @param[out] output Base of output matrices. - * @param[in] matrix_stride Stride between output matrices. - * @param[in] workspace Tensor to be used as the working space during the computation. - */ - virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels, - const PaddingType padding, ITensor *output, const int matrix_stride, ITensor *workspace) = 0; - - /** Destructor */ - virtual ~INEWinogradLayerTransformInputKernel() - { - } -}; - -/** NEON kernel to perform Winograd input transform. */ -template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> -class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel -{ -public: - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default; - /** Default destructor */ - ~NEWinogradLayerTransformInputKernel() = default; - - /** Determine how much memory (in units of TIn) to allocate for the - * transformed input. - * - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_channels Number of feature maps in the input tensor. - * @param[in] num_rows Number of rows in each feature map. - * @param[in] num_cols Number of columns in each feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - * - * @return Storage size (in units of TIn) required. - */ - unsigned int get_input_storage_size( - int num_batches, - int num_channels, - int num_rows, - int num_cols, - bool same_padding) const override; - - /** Get the working space required to perform the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param[in] num_threads The greatest number of threads that will be used to execute the transform. - * - * @return Size of working space required in bytes. - */ - unsigned int get_working_space_size(unsigned int num_threads) const override; - - /** Gets the stride between matrices in the input worspace - * - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_channels Number of feature maps in the input tensor. - * @param[in] num_rows Number of rows in each feature map. - * @param[in] num_cols Number of columns in each feature map. - * @param[in] same_padding Use "SAME" padding, otherwise use "VALID". - * - * @return Stride expressed in bytes. - */ - int get_matrix_stride( - int num_batches, - int num_channels, - int num_rows, - int num_cols, - bool same_padding) const override; - - /** Default constructor */ - NEWinogradLayerTransformInputKernel(); - - const char *name() const override - { - return "NEWinogradLayerTransformInputKernel"; - } - - /** Configure the output transform kernel. - * - * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC. - * @param[in] num_batches Number of batches in input tensor. - * @param[in] num_rows Number of rows in input tensor. - * @param[in] num_cols Number of columns in input tensor. - * @param[in] num_channels Number of channels in input tensor. - * @param[in] padding Padding type. - * @param[out] output Base of output matrices. - * @param[in] matrix_stride Stride between output matrices. - * @param[in] workspace Tensor to be used as the working space during the computation. - */ - void configure( - const ITensor *input_nhwc, - const int num_batches, - const int num_rows, - const int num_cols, - const int num_channels, - const PaddingType padding, - ITensor *output, - const int matrix_stride, - ITensor *workspace) override; - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Winograd base kernel */ - using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>; - /** Winograd convolution kernel */ - using WinogradConv = typename WinogradBase::template Convolution<T, T>; - - /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] output Output tensor info. Data types supported: same as @p input. - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - -private: - using InputTransform = typename WinogradBase::template InputTransform<T, T>; - - std::unique_ptr<InputTransform> _transform{ nullptr }; - const ITensor *_input_nhwc; - int _num_batches; /**< Number of batches in input tensor. */ - int _num_rows; /**< Number of rows in input tensor. */ - int _num_cols; /**< Number of columns in input tensor. */ - int _num_channels; /**< Number of channels in input tensor. */ - PaddingType _padding; /**< Padding type. */ - ITensor *_output; /**< Base of output matrices. */ - int _matrix_stride; /**< Stride between output matrices. */ - int _padding_top; /**< Padding to apply to the top of the image. */ - int _padding_left; /**< Padding to apply to the left of the image. */ - int _padding_right; /**< Padding to apply to the right of the image. */ - int _padding_bottom; /**< Padding to apply to the bottom of the image. */ - ITensor *_workspace; -}; - -/** Interface for the NEON kernel to perform Winograd output transform. */ -class INEWinogradLayerTransformOutputKernel : public INEKernel -{ -public: - /** Get the working space required to perform the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param[in] num_threads The greatest number of threads that will be used to execute the transform. - * - * @return Size of working space required in bytes. - */ - virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0; - - /** Determine how much memory (in units of TOut) to allocate for the - * (Winograd domain) output. - * - * @param[in] num_batches Number of batches in the output tensor. - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] num_output_channels Number of feature maps in the output tensor. - * - * @return Storage size (in units of TOut) required. - */ - virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0; - - /** Gets the stride between matrices in the output worspace - * - * @param[in] num_batches Number of batches in the output tensor. - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] num_output_channels Number of feature maps in the output tensor. - * - * @return Stride expressed in bytes. - */ - virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0; - - /** Get the output shape of a convolution. - * - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] padding_same True if padding is SAME, false otherwise - * - * @return Shape of the output tensor - */ - virtual std::pair<unsigned int, unsigned int> get_output_shape( - int num_rows, /* Number of rows in each feature map of the input tensor. */ - int num_cols, /* Number of columns in each feature map of the input tensor. */ - bool padding_same /* True if padding is SAME, false otherwise */ - ) const = 0; - - /** Configure the output transform kernel. - * - * @param[in] biases Pointer to the biases tensor. - * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain. - * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride() - * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain. - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_rows Number of rows in output tensor. - * @param[in] num_cols Number of columns in output tensor. - * @param[in] num_channels Number of feature maps in the output tensor. - * @param[in] workspace Tensor to be used as the working space during the computation. - * @param[in] activation Activation to be used - */ - virtual void configure( - const ITensor *biases, - const ITensor *transformed_output, - const int matrix_stride, - ITensor *output_nhwc, - const int num_batches, - const int num_rows, - const int num_cols, - const int num_channels, - ITensor *workspace, - const arm_gemm::Activation &activation) = 0; - - virtual ~INEWinogradLayerTransformOutputKernel() - { - } -}; - -/** NEON kernel to perform Winograd output transform. */ -template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> -class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel -{ -public: - const char *name() const override - { - return "NEWinogradLayerTransformOutputKernel"; - } - /** Constructor */ - NEWinogradLayerTransformOutputKernel(); - - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default; - /** Default destructor */ - ~NEWinogradLayerTransformOutputKernel() = default; - - // Inherited methods overridden: - /** Determine how much memory (in units of TOut) to allocate for the - * (Winograd domain) output. - * - * @param[in] num_batches Number of batches in the output tensor. - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] num_output_channels Number of feature maps in the output tensor. - * - * @return Storage size (in units of TOut) required. - */ - unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override; - - /** Gets the stride between matrices in the output worspace - * - * @param[in] num_batches Number of batches in the output tensor. - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] num_output_channels Number of feature maps in the output tensor. - * - * @return Stride expressed in bytes. - */ - int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override; - /** Get the output shape of a convolution. - * - * @param[in] num_rows Number of rows in each feature map of the input tensor. - * @param[in] num_cols Number of columns in each feature map of the input tensor. - * @param[in] padding_same True if padding is SAME, false otherwise - * - * @return Shape of the output tensor - */ - std::pair<unsigned int, unsigned int> get_output_shape( - int num_rows, /* Number of rows in each feature map of the input tensor. */ - int num_cols, /* Number of columns in each feature map of the input tensor. */ - bool padding_same) const override; - - /** Get the working space required to perform the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param[in] num_threads The greatest number of threads that will be used to execute the transform. - * - * @return Size of working space required in bytes. - */ - unsigned int get_working_space_size(unsigned int num_threads) const override; - - /** Configure the output transform kernel. - * - * @param[in] biases Pointer to the biases tensor. - * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain. - * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride() - * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain. - * @param[in] num_batches Number of batches in the input tensor. - * @param[in] num_rows Number of rows in output tensor. - * @param[in] num_cols Number of columns in output tensor. - * @param[in] num_channels Number of feature maps in the output tensor. - * @param[in] workspace Tensor to be used as the working space during the computation. - * @param[in] activation Activation to be used - */ - void configure( - const ITensor *biases, - const ITensor *transformed_output, - const int matrix_stride, - ITensor *output_nhwc, - const int num_batches, - const int num_rows, - const int num_cols, - const int num_channels, - ITensor *workspace, - const arm_gemm::Activation &activation) override; - - void run(const Window &window, const ThreadInfo &info) override; - - /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel - * - * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32. - * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input - * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info); - -private: - using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>; - using WinogradConv = typename WinogradBase::template Convolution<T, T>; - using OutputTransform = typename WinogradBase::template OutputTransform<T, T>; - - std::unique_ptr<OutputTransform> _transform{ nullptr }; - const ITensor *_biases; - const ITensor *_transformed_output; - ITensor *_workspace; - int _matrix_stride; - int _matrix_row_stride; - ITensor *_output_nhwc; - int _num_batches; - int _num_rows; - int _num_cols; - int _num_channels; -}; - -/** Interface for the NEON kernel to perform Winograd weights transform. */ -class INEWinogradLayerTransformWeightsKernel : public INEKernel -{ -public: - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default; - /** Allow instances of this class to be moved */ - INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default; - - INEWinogradLayerTransformWeightsKernel() - { - } - virtual ~INEWinogradLayerTransformWeightsKernel() - { - } - /** Determine how much memory (in units of T) to allocate for the - * transformed weights. - * - * @param[in] num_output_channels Number of output feature maps. - * @param[in] num_input_channels Number of input feature maps. - * - * @return Storage size (in units of T) required. - */ - virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0; - /** Gets the stride between matrices in the kernel worspace - * - * @param[in] num_output_channels Number of output feature maps. - * @param[in] num_input_channels Number of input feature maps. - * - * @return Stride expressed in bytes. - */ - virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0; - - /** Configure the weights transform kernel. - * - * @param[in] weights_hwio Pointer to the weights tensor - * @param[out] output Pointer to working space for the output tensor in the Winograd domain. - * @param[in] matrix_stride Stride across matrices in the output workspace. - * @param[in] num_output_channels Number of filters. - * @param[in] num_input_channels Number of channels in each filter. - */ - - virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0; - - /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel - * - * @param[in] input First tensor input info. Data types supported: F16/F32. - * @param[in] weights Weights tensor info. Data types supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights); -}; - -/** NEON kernel to perform Winograd weights transform. */ -template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> -class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel -{ -public: - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default; - /** Default destructor */ - ~NEWinogradLayerTransformWeightsKernel() = default; - - /** Default constructor. */ - NEWinogradLayerTransformWeightsKernel(); - const char *name() const override - { - return "NEWinogradLayerTransformWeightsKernel"; - } - - /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel - * - * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout). - * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32. - * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input - * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info); - - // Inherited methods overridden: - -#ifndef DOXYGEN_SKIP_THIS - /** Configure the weights transform kernel. - * - * @param[in] weights_hwio Pointer to the weights tensor - * @param[out] output Pointer to working space for the output tensor in the Winograd domain. - * @param[in] matrix_stride Stride across matrices in the output workspace. - * @param[in] num_output_channels Number of filters. - * @param[in] num_input_channels Number of channels in each filter. - */ - void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override; -#endif /* DOXYGEN_SKIP_THIS */ - - /** Determine how much memory (in units of T) to allocate for the - * transformed weights. - * - * @param[in] num_output_channels Number of output feature maps. - * @param[in] num_input_channels Number of input feature maps. - * - * @return Storage size (in units of T) required. - */ - unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override; - - /** Gets the stride between matrices in the input worspace - * - * @param[in] num_output_channels Number of output feature maps. - * @param[in] num_input_channels Number of input feature maps. - * - * @return Stride expressed in bytes. - */ - int get_matrix_stride(int num_output_channels, int num_input_channels) const override; - void run(const Window &window, const ThreadInfo &info) override; - bool is_parallelisable() const override; - -private: - using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>; - using WinogradConv = typename WinogradBase::template Convolution<T, T>; - using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>; - - std::unique_ptr<WeightsTransform> _transform{ nullptr }; - const ITensor *_weights_hwio; - ITensor *_output; - int _matrix_stride; - int _num_output_channels; - int _num_input_channels; -}; - -/** NEON kernel to perform Winograd. */ -template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols> -class NEWinogradLayerConfiguration -{ -public: - /** Winograd base kernel */ - using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>; - /** Winograd convolution kernel */ - - using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>; - - using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>; - using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>; - using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>; -}; - -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H*/ diff --git a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h deleted file mode 100644 index 0fd3f8ce67..0000000000 --- a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H -#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the YOLO layer kernel. */ -class NEYOLOLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEYOLOLayerKernel"; - } - /** Constructor */ - NEYOLOLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete; - /** Default move constructor */ - NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete; - /** Default move assignment operator */ - NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default; - /** Default destructor */ - ~NEYOLOLayerKernel() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] act_info Activation layer parameters. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - */ - void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes); - /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel - * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result - * of the activation function. Data types supported: F16/F32. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * @param[in] act_info Activation layer information. - * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels) - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - /** Function to run YOLO layer - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename T, int S> - void yolo_layer_nchw(const Window &window); - /** Function to run YOLO layer on tensors with NHWC format - * - * @param[in] window Region on which to execute the kernel. - */ - template <typename T> - void yolo_layer_nhwc(const Window &window); - /** Common signature for all the yolo layer functions - * - * @param[in] window Region on which to execute the kernel. - */ - using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window); - -private: - YOLOFunctionPtr _func; - ITensor *_input; - ITensor *_output; - ActivationLayerInfo _act_info; - int32_t _num_classes; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp b/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp deleted file mode 100644 index 4ff83fbc51..0000000000 --- a/arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -#include <array> -#include <algorithm> -#include <initializer_list> - -#include <cassert> - -namespace arm_gemm { - -template<unsigned int D> -class NDRange { -private: - std::array<unsigned int, D> m_sizes {}; - std::array<unsigned int, D> m_totalsizes {}; - - class NDRangeIterator { - private: - const NDRange &m_parent; - unsigned int m_pos = 0; - unsigned int m_end = 0; - - public: - NDRangeIterator(const NDRange &p, unsigned int s, unsigned int e) : m_parent(p), m_pos(s), m_end(e) { } - - bool done() const { - return (m_pos >= m_end); - } - - unsigned int dim(unsigned int d) const { - unsigned int r = m_pos; - - if (d < (D - 1)) { - r %= m_parent.m_totalsizes[d]; - } - - if (d > 0) { - r /= m_parent.m_totalsizes[d-1]; - } - - return r; - } - - bool next_dim0() { - m_pos++; - - return !done(); - } - - bool next_dim1() { - m_pos += m_parent.m_sizes[0] - dim(0); - - return !done(); - } - - unsigned int dim0_max() const { - unsigned int offset = std::min(m_end - m_pos, m_parent.m_sizes[0] - dim(0)); - - return dim(0) + offset; - } - }; - -public: - NDRange& operator=(const NDRange& rhs)=default; - NDRange(const NDRange& rhs) =default; - - template <typename... T> - NDRange(T... ts) - : m_sizes{ts...} - { - unsigned int t=1; - - for (unsigned int i=0; i<D; i++) { - t *= m_sizes[i]; - - m_totalsizes[i] = t; - } - } - - NDRange(const std::array<unsigned int, D>& n) - : m_sizes(n) - { - unsigned int t=1; - - for (unsigned int i=0; i<D; i++) { - t *= m_sizes[i]; - - m_totalsizes[i] = t; - } - } - - NDRangeIterator iterator(unsigned int start, unsigned int end) const { - return NDRangeIterator(*this, start, end); - } - - unsigned int total_size() const { - return m_totalsizes[D - 1]; - } - - unsigned int get_size(unsigned int v) const { - return m_sizes[v]; - } -}; - -/** NDCoordinate builds upon a range, but specifies a starting position - * in addition to a size which it inherits from NDRange - */ -template<unsigned int N> -class NDCoordinate : public NDRange<N> { - using int_t =unsigned int; - using ndrange_t = NDRange<N>; - - std::array<int_t, N> m_positions {}; -public: - NDCoordinate& operator=(const NDCoordinate& rhs)=default; - NDCoordinate(const NDCoordinate& rhs) =default; - NDCoordinate(const std::initializer_list<std::pair<int_t, int_t>>& list) - { - std::array<int_t, N> sizes{}; - - std::size_t i = 0; - for(auto& p : list) { - m_positions[i]= p.first; - sizes[i++] = p.second; - } - - //update the parents sizes - static_cast<ndrange_t&>(*this) = ndrange_t(sizes); - } - - int_t get_position(int_t d) const { - assert(d < m_positions.size()); - return m_positions[d]; - } - - void set_position(int_t d, int_t v) { - assert(d < size(m_positions)); - assert(v < ndrange_t::get_size(d)); - - m_positions[d] = v; - } - - int_t get_position_end(int_t d) const { - return get_position(d) + NDRange<N>::get_size(d); - } -}; //class NDCoordinate - -/** @returns the number of dimensions in the NDRange which have none-1 values - * IE there is actual work in these dimensions that can be broken up - */ -template<unsigned int N> -std::size_t ndrange_popcount(const NDRange<N>& ndr) { - std::size_t count = 0; - - for(unsigned int d = 0; d != N; ++d) { - if(ndr.get_size(d) != 1) - ++count; - } - return count; -} - -} // namespace arm_gemm diff --git a/arm_compute/core/NEON/kernels/assembly/Helpers.h b/arm_compute/core/NEON/kernels/assembly/Helpers.h deleted file mode 100644 index 9372e05295..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/Helpers.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ASSEMBLY_HELPERS_H -#define ARM_COMPUTE_ASSEMBLY_HELPERS_H - -#include "arm_compute/core/CPP/CPPTypes.h" -#include "arm_compute/core/Utils.h" - -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" - -namespace arm_compute -{ -/** Block sizes to use to break the M, N, K dimension */ -struct BlockSizes -{ - unsigned int k_block{ 0 }; /**< Block size alon the K dimension */ - unsigned int x_block{ 0 }; /**< Block size along the N (x) dimension */ - unsigned int m_round{ 0 }; /**< Block size along the M dimension (Must be a multiple of strategy_out_height) */ - unsigned int strategy_out_height{ 0 }; /**< Number of rows (M) processed by the selected strategy */ -}; - -/** Extracts the kernel description of the selected kernel by the GEMM backend heuristics - * - * @param[in] input_type Data type of the input tensor. - * @param[in] ci CPU information. - * @param[in] num_threads Maximum number of threads that might be used for the calculations. - * @param[in] p M, N, K sizes. - * @param[in] activation Activation struct - * @param[in] pretranspose_hint Is B also pretransposed ? - * - * @return Kernel description that the assembly heuristics picked for the given configuration - */ -arm_gemm::KernelDescription get_gemm_info(DataType input_type, - const CPUInfo &ci, - const unsigned int num_threads, - const INEGEMMWrapperKernel::Params &p, - arm_gemm::Activation activation, - bool pretranspose_hint); - -/** Calculate the recommended block sizes to use based on the CPU cache sizes and the strategy which will be used - * - * @param[in] ci CPU information. - * @param[in] M M dimension. - * @param[in] N N dimension. - * @param[in] K K dimension. - * - * @return Recommeded block sizes to use for the given M, N, K dimensions. - */ -template <typename strategy> -BlockSizes calculate_block_sizes(const CPUInfo &ci, unsigned int M, unsigned int N, unsigned int K) -{ - BlockSizes bs; - - using Toi = typename strategy::operand_type; - - const unsigned int L1_size = ci.get_L1_cache_size(); - const unsigned int L2_size = ci.get_L2_cache_size(); - - // Work out blocking parameters - - // k_block: Find out how much of the larger array can be loaded into half the cache. - // This should account for associative caches. - bs.k_block = (L1_size / 2) / (sizeof(Toi) * (std::max(strategy::out_width(), strategy::out_height()))); - - // Needs to be (at least a single) multiple of the K unroll level. - bs.k_block /= strategy::k_unroll(); - bs.k_block = std::max(bs.k_block, 1U) * strategy::k_unroll(); - - // Now tune to presented problem size; this is how many blocks we need. - int num_k_blocks = DIV_CEIL(K, bs.k_block); - - // So divide the space equally into that many blocks. - bs.k_block = DIV_CEIL(K, num_k_blocks); - - // And round UP to the K unroll level required. - bs.k_block = ceil_to_multiple(bs.k_block, strategy::k_unroll()); - - // x_block: Work out how many rows (of length k_block) will fit in the L2 - // Don't allocate more than 90% of the L2 to allow for overheads, and subtract off the L1 contents. - bs.x_block = (((L2_size * 9) / 10) - (bs.k_block * sizeof(Toi) * (strategy::out_width() + strategy::out_height()))) / (sizeof(Toi) * bs.k_block); - - // Needs to be (at least a single) multiple of the kernel output width. - bs.x_block /= strategy::out_width(); - bs.x_block = std::max(bs.x_block, 1U) * strategy::out_width(); - - // And tune to the presented problem size. - int num_x_blocks = DIV_CEIL(N, bs.x_block); - bs.x_block = DIV_CEIL(N, num_x_blocks); - - bs.x_block = ceil_to_multiple(bs.x_block, strategy::out_width()); - - // Work out the rounded size of M - needed for some buffers. - bs.m_round = ceil_to_multiple(M, strategy::out_height()); - bs.strategy_out_height = strategy::out_height(); - - return bs; -} - -} // namespace arm_compute -#endif /* ARM_COMPUTE_ASSEMBLY_HELPERS_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h deleted file mode 100644 index f152ab5f61..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H -#define ARM_COMPUTE_INEGEMMWRAPPERKERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Common interface for all the arm_gemm Gemms - */ -class INEGEMMWrapperKernel : public INEKernel -{ -public: - /** Parameters defining the dimensions of the matrices being multiplied */ - struct Params - { - unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */ - unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */ - unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */ - unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */ - unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */ - }; - - static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info); - - /** Constructor */ - INEGEMMWrapperKernel(); - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete; - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * @param[in] gemm_info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Called as part of configure() after _a, _b, _c and _params have been set. - * - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * - * @return A 3D execution window. - */ - virtual Window configure_internal(float alpha, float beta) = 0; - - /** Run the kernel from the start to the end offset in window. - * - * @param[in] window Window to use for the iteration - * @param[in] start_offset Where to start iterating from (In Window coordinates) - * @param[in] end_offset Where to stop iterating (In Window coordinates). - * @param[in] info Info about executing thread and CPU. - */ - virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0; - - const ITensor *_a; - const ITensor *_b; - ITensor *_c; - Params _params; - GEMMInfo _gemm_info; - -private: - Window _window3d; - TensorShape _window_shape; -}; - -} // namespace arm_compute - -#endif /* ARM_COMPUTE_INEGEMMRAPPERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h b/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h deleted file mode 100644 index 8a9fb82b4a..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H -#define ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H - -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" - -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp" - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** This class is a wrapper for the depthwise convolution assembly kernels. */ -class NEDepthwiseConvolutionAssemblyKernelWrapper final : public INEKernel -{ -public: - const char *name() const override - { - return "NEDepthwiseConvolutionAssemblyKernelWrapper"; - } - - /** Default constructor */ - NEDepthwiseConvolutionAssemblyKernelWrapper() - : _kernel(nullptr) - { - } - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyKernelWrapper(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(const NEDepthwiseConvolutionAssemblyKernelWrapper &) = delete; - /** Default Move Constructor. */ - NEDepthwiseConvolutionAssemblyKernelWrapper(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; - /** Default move assignment operator */ - NEDepthwiseConvolutionAssemblyKernelWrapper &operator=(NEDepthwiseConvolutionAssemblyKernelWrapper &&) = default; - - /** Initialise the kernel's input and output. - * - * @param[in] kernel Pointer to an assembly kernel implementation. - */ - void configure(depthwise::IDepthwiseConvolution *kernel) - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel))); - _kernel = kernel; - Window win; - win.set(Window::DimX, Window::Dimension(0, _kernel->get_window(), 1)); - INEKernel::configure(win); - } - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel))); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - auto first = window.x().start(); - auto last = window.x().end(); - _kernel->run(first, last, info.thread_id); - } - -private: - depthwise::IDepthwiseConvolution *_kernel; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_ASSEMBLY_DEPTHWISE_CONVOLUTION_ASSEMBLY_WRAPPER_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h deleted file mode 100644 index 0e3dd74577..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H -#define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H - -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp" -#include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" - -#include "gemm_common.hpp" - -namespace arm_compute -{ -class ITensor; - -/** This class is a wrapper for the assembly kernels. - * - * Some kernels were written in assembly and highly optimised for specific CPUs like A53 or A55. - * This class works as a wrapper for these assembly kernels. The arm compute library creates an instance - * of NEGEMMAssemblyWrapperKernel and other auxiliary data structures to execute a single assembly kernel - * in the context of an NEFunctions. - * - * The type T is the type of the actual kernel implemented in assembly which is of type - * template<typename To, typename Tr> class GemmCommon - * - * - */ -template <typename TypeInput, typename TypeOutput> -class NEGEMMAssemblyWrapperKernel final : public INEKernel -{ -public: - /** Constructor - */ - NEGEMMAssemblyWrapperKernel() - : _kernel(nullptr), _name("NEGEMMAssemblyWrapperKernel") - { - } - - NEGEMMAssemblyWrapperKernel(NEGEMMAssemblyWrapperKernel &) = delete; - NEGEMMAssemblyWrapperKernel(NEGEMMAssemblyWrapperKernel &&) = default; - NEGEMMAssemblyWrapperKernel &operator=(NEGEMMAssemblyWrapperKernel &) = delete; - - const char *name() const override - { - return _name.c_str(); - } - - - void run(const Window &window, const ThreadInfo &info) override - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel))); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - - auto win=arm_gemm::to_ndcoord(window); - - arm_gemm::ndcoord_t thread_locator { }; - - _kernel->execute(win, thread_locator, info.thread_id); - } - - // Inherited methods overridden: - void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel))); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - - //convert between arm_compute and arm_gemm types - auto ndc_win = arm_gemm::to_ndcoord(window); - auto ndc_tlc = arm_gemm::to_ndcoord(thread_locator); - - _kernel->execute(ndc_win, ndc_tlc, info.thread_id); - } - - /** Initialise the kernel's input and output. - * - * @param[in] kernel Pointer to an assembly kernel implementation. - * @param[in] num_threads Number of concurrent threads which will execute the kernel. - */ - void configure(arm_gemm::GemmCommon<TypeInput, TypeOutput> *kernel, std::string kernel_name_tag) - { - ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel))); - _kernel = kernel; - - Window win = to_window(kernel->get_window_size()); - - INEKernel::configure(win); - - if(!kernel_name_tag.empty()) - { - _name += "/" + kernel_name_tag; - } - } - -private: - arm_gemm::GemmCommon<TypeInput, TypeOutput> *_kernel; - std::string _name; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp deleted file mode 100644 index 7723224ec8..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -#include <memory> -#include <cstring> - -#include "arm_gemm_local.hpp" -#include "gemm_common.hpp" - -namespace arm_gemm { - -enum class GemmMethod -{ - DEFAULT, - GEMV_BATCHED, - GEMV_PRETRANSPOSED, - GEMV_NATIVE_TRANSPOSED, - GEMM_NATIVE, - GEMM_HYBRID, - GEMM_INTERLEAVED, - GEMM_INTERLEAVED_2D, - QUANTIZE_WRAPPER, - GEMM_HYBRID_QUANTIZED -}; - -struct KernelDescription -{ - GemmMethod method = GemmMethod::DEFAULT; - std::string name = ""; - bool is_default = false; - - KernelDescription(GemmMethod m, std::string n, bool d=false) : method(m), name(n), is_default(d) { } - KernelDescription() noexcept { } -}; - -struct GemmConfig -{ - GemmMethod method = GemmMethod::DEFAULT; - std::string filter = ""; - unsigned int inner_block_size = 0; - unsigned int outer_block_size = 0; - - GemmConfig(GemmMethod method) : method(method) { } - GemmConfig() { } -}; - -struct Activation -{ - enum class Type { - None, - ReLU, - BoundedReLU - }; - - Type type; - float param1; - float param2; - - Activation(Type type=Type::None, float p1=0.0f, float p2=0.0f) : type(type), param1(p1), param2(p2) { } -}; - -struct GemmArgs -{ -public: - const CPUInfo *_ci; - unsigned int _Msize; - unsigned int _Nsize; - unsigned int _Ksize; - unsigned int _nbatches; - unsigned int _nmulti; - bool _trA; - bool _trB; - Activation _act; - int _maxthreads; - bool _pretransposed_hint; - const GemmConfig *_cfg; - - GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N, - const unsigned int K, const unsigned int nbatches, - const unsigned int nmulti, const bool trA, const bool trB, - Activation act, const int maxthreads, - const bool pretransposed_hint, const GemmConfig *cfg=nullptr ) : - _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti), - _trA(trA), _trB(trB), _act(act), _maxthreads(maxthreads), - _pretransposed_hint(pretransposed_hint), _cfg(cfg) - { - } -}; - -struct Requantize32 -{ -public: - const int32_t *bias = nullptr; - size_t bias_multi_stride = 0; - int32_t a_offset = 0; - int32_t b_offset = 0; - int32_t c_offset = 0; - bool per_channel_requant = false; - int32_t per_layer_shift = 0; - int32_t per_layer_mul = 0; - const int32_t *per_channel_shifts = nullptr; - const int32_t *per_channel_muls = nullptr; - int32_t minval = 0; - int32_t maxval = 0; - - Requantize32() = default; - - // Constructor for per-tensor quantization - Requantize32(const int32_t *bias, size_t bias_multi_stride, - int32_t a_offset, int32_t b_offset, int32_t c_offset, - int32_t requant_shift, int32_t requant_mul, - int32_t minv, int32_t maxv) : - bias(bias), bias_multi_stride(bias_multi_stride), - a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), - per_channel_requant(false), per_layer_shift(requant_shift), per_layer_mul(requant_mul), - minval(minv), maxval(maxv) - { - } - - // Constructor for per-channel quantization - Requantize32(const int32_t *bias, size_t bias_multi_stride, - int32_t a_offset, int32_t b_offset, int32_t c_offset, - const int32_t *requant_shifts, const int32_t *requant_muls, - int32_t minv, int32_t maxv) : - bias(bias), bias_multi_stride(bias_multi_stride), - a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), - per_channel_requant(true), per_channel_shifts(requant_shifts), per_channel_muls(requant_muls), - minval(minv), maxval(maxv) - { - } -}; - -struct Nothing -{ -}; - -template<typename Top, typename Tret> -using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >; - -/* Low level API calls. - * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */ - -/* get_gemm_method(): Given the templated types and provided parameters, - * which is the preferred method to implement this GEMM? */ -template<typename Top, typename Tret, class OutputStage = Nothing> -KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage & ={}); - -template<typename Top, typename Tret, class OutputStage = Nothing> -UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage & ={}); - -template<typename Top, typename Tret, class OutputStage = Nothing> -std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage & ={}); - -} // namespace arm_gemm diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp deleted file mode 100644 index 6f345c1721..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -#include "arm_compute/core/Window.h" -#include "arm_compute/core/Dimensions.h" -#include "arm_compute/core/NEON/kernels/arm_gemm/ndrange.hpp" - -#include <cassert> - -/* This file contains mapping between integral types used in arm_compute and arm_gemm - * These two codebases both require a degree of separation for the sake of modularity - * so maintain their own types which represent similar information. - */ - -namespace arm_gemm { - -//we want to unify the maximum number of dimensions used beween arm_gemm and arm compute library -constexpr std::size_t ndrange_max = - arm_compute::Dimensions<unsigned int>::num_max_dimensions; - -using ndrange_t=NDRange<ndrange_max>; -using ndcoord_t=NDCoordinate<ndrange_max>; - -/* Converts an `arm_gemm::ndrange_t` to a `arm_compute::Window` - * - * As `NDRange<T>` does not not encode start positions, we specify - * the start to be zero in the produced `arm_compute::Window` - * - * @param [ndr] the `arm_gemm::ndrange_t` we wish to convert into a `arm_compute::Window` - * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndr` - */ -inline arm_compute::Window to_window(const ndrange_t& ndr) { - arm_compute::Window win; - - for(unsigned int i = 0; i!=ndrange_max; ++i) { - //populate the window with the dimensions of the NDRange - win.set(i, arm_compute::Window::Dimension(0, ndr.get_size(i))); - } - - return win; -} - -/* - * Converts an `arm_gemm::ndcoord_t` to a `arm_compute::Window` - * - * @param [ndc] the `arm_gemm::ndcoord_t` we wish to convert into a `arm_compute::Window` - * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndc` - */ -inline arm_compute::Window to_window(const ndcoord_t& ndc) { - arm_compute::Window win; - - for(unsigned int i = 0; i!=ndrange_max; ++i) { - const auto start = ndc.get_position(i); - const auto size = ndc.get_size(i); - const auto stop = start + size; - - //populate the window with the dimensions of the NDRange - win.set(i, arm_compute::Window::Dimension(start, stop)); - } - - return win; -} - -/** Convert an `arm_compute::Window` to an `arm_gemm::NDRange` of the same max dimensions - * - * It should be noted that `arm_compute::Window` specifies a `start()` and an `end()` - * where as `arm_gemm::ndrange_t` only has a size, as a result we store the delta between the range - * - * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndrange_t` - * @return the resultant ndrange_t - */ -inline ndrange_t to_ndrange(const arm_compute::Window& win) { - return { - static_cast<unsigned int>(win[0].end() - win[0].start()), - static_cast<unsigned int>(win[1].end() - win[1].start()), - static_cast<unsigned int>(win[2].end() - win[2].start()), - static_cast<unsigned int>(win[3].end() - win[3].start()), - static_cast<unsigned int>(win[4].end() - win[4].start()), - static_cast<unsigned int>(win[5].end() - win[5].start()) - }; -} - -/** Convert an `arm_compute::Window` to an `arm_gemm::NDCoord` of the same max dimensions - * - * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndcoord_t` - * @return the resultant ndcoord_t - */ -inline ndcoord_t to_ndcoord(const arm_compute::Window& win) { - return { - { static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start()) }, - { static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start()) }, - { static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start()) }, - { static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start()) }, - { static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start()) }, - { static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start()) } - }; -} - -} //namespace arm_gemm diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp deleted file mode 100644 index 8d3db4adf2..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -/* This file is used to configure integration-specific aspects of arm_gemm into ACL */ - -#include "arm_compute/core/CPP/CPPTypes.h" - -namespace arm_gemm -{ -using CPUModel = arm_compute::CPUModel; -using CPUInfo = arm_compute::CPUInfo; -} // namespace arm_compute - - - diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp deleted file mode 100644 index ea9b524e15..0000000000 --- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp" - -#include <cstddef> -#include <cassert> - -#define UNUSED(x) (void)(x) - -namespace arm_gemm { - -// Abstract class for the GEMM/GEMV functions. -// -// GEMM implementations may be "native" (never require any input -// permutation), "pretransposed" (require permutation up-front) or require -// working space (permute as they go along). This interface should support -// all of them. - -// The real GemmCommon class is templated based on the operand and return -// type. This is an interface class which is independent of those types. -class IGemmCommon { -public: - /* Pass in the pointers to the arrays to be operated on and their - * strides. This "generic" version uses void *s, the preferred version - * is the one provided by templated GemmCommon (below) which takes - * appropriately typed pointers. If B is pretransposed (see below) then - * the settings for B here are ignored. - */ - virtual void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const void *B, const int ldb, /* batches share B */ const int B_multi_stride, - void *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0; - - /** @returns an ndrange containing ranges of the compute space which can be - * broken up and parallelised over - */ - virtual ndrange_t get_window_size() const = 0; - - /* The maximum thread count is specified when the GEMM is created. Some - * implementations need to know how many threads will actually run in - * order to work properly. - * - * In some cases, after creating the GEMM the number of threads needs to - * be reduced (e.g. not enough work to split across threads). This - * method allows the number of actual threads to be run to be set (must - * be equal or lower). - * - * This has an empty default implementation, as GEMMs which don't care - * about thread count can safely ignore this. - */ - virtual void set_nthreads(int) { }; - - /* Whether this GEMM can be dynamically scheduled or not. */ - virtual bool supports_dynamic_scheduling() const { return false; } - - /** Main execute member fucntion - * @param [in] work_range specifies the range of work we want to be computed, total range defined by get_window_size() - * @param [in] thread_locator where are we inside of the thread space - * @naram [in] threadid a unique threadid - */ - virtual void execute(const ndcoord_t& work_range, const ndcoord_t& thread_locator, int threadid) = 0; - - /*** Working space interface (optional) ***/ - /* Total number of bytes of temporary working space needed. If zero, it's not necessary to call set_working_space(). */ - virtual size_t get_working_size() const { return 0; } - /* Provide working space buffer - the void * passed in must remain allocated for the duration of any execute calls. */ - virtual void set_working_space(void *) { }; - - /*** "Pretransposed" interface (optional) ***/ - /* Is this object set up for pretranspose? If so, pretranspose_array() needs to be called before execute(); */ - virtual bool B_is_pretransposed() const { return false; } - /* Does pretranspose still need to be done? */ - virtual bool B_pretranspose_required() const { return false; } - /* Total number of bytes of space needed for pretransposed arrays. */ - virtual size_t get_B_pretransposed_array_size() const { return 0; } - /* Perform pretranspose - arguments are output, input, input row stride and input multi stride. */ - /* The "real" version of this depends on the templated operand type (see below). */ - virtual void pretranspose_B_array_generic(void *, const void *, const int, const int) = 0; - /* Set pretransposed data - the void * passed in must previously have been passed to pretranspose_B_array() for the same or a similar GEMM. */ - virtual void set_pretransposed_B_data(void *) { } - - /*** "Quantized bias" interface (optional) ***/ - /* Set the bias vector for quantized GEMMs */ - virtual void set_quantized_bias(const int32_t *bias, size_t bias_multi_stride) - { - UNUSED(bias); - UNUSED(bias_multi_stride); - } - - // Destructor - virtual ~IGemmCommon() { } -}; - -/* "Real" GemmCommon class which is templated on the operand and return types. - * - * In addition to correctly typed versions of the functions that operate on - * operand and return data, this class provides a default implementation of - * 'set_arrays' to capture the provided arguments in protected class - * members, as essentially any implementation will need these. - */ -template<typename To, typename Tr> -class GemmCommon : public IGemmCommon { -protected: - const To *_Aptr=nullptr; - int _lda=0; - int _A_batch_stride=0; - int _A_multi_stride=0; - const To *_Bptr=nullptr; - int _ldb=0; - int _B_multi_stride=0; - Tr *_Cptr=nullptr; - int _ldc=0; - int _C_batch_stride=0; - int _C_multi_stride=0; - const Tr *_bias=nullptr; - int _bias_multi_stride=0; - -public: - /* Pass in the pointers to the arrays to be operated on and their - * strides (templated version with appropriate types). */ - virtual void set_arrays(const To *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const To *B, const int ldb, /* batches share B */ const int B_multi_stride, - Tr *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const Tr *bias, /* no row or batch stride needed */ const int bias_multi_stride) { - _Aptr = A; - _lda = lda; - _A_batch_stride = A_batch_stride; - _A_multi_stride = A_multi_stride; - _Bptr = B; - _ldb = ldb; - _B_multi_stride = B_multi_stride; - _Cptr = C; - _ldc = ldc; - _C_batch_stride = C_batch_stride; - _C_multi_stride = C_multi_stride; - _bias = bias; - _bias_multi_stride = bias_multi_stride; - } - - /* Implementation of the void * overload which casts its arguments to the appropriate type. */ - void set_arrays_generic(const void *A, const int lda, const int A_batch_stride, const int A_multi_stride, - const void *B, const int ldb, /* batches share B */ const int B_multi_stride, - void *C, const int ldc, const int C_batch_stride, const int C_multi_stride, - const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) override { - set_arrays(static_cast<const To *>(A), lda, A_batch_stride, A_multi_stride, - static_cast<const To *>(B), ldb, B_multi_stride, - static_cast<Tr *>(C), ldc, C_batch_stride, C_multi_stride, - static_cast<const Tr *>(bias), bias_multi_stride); - } - - /*** "Pretransposed" interface ***/ - - /* Perform pretranspose - the void * passed in must remain allocated for the duration of any execute calls. */ - /* Arguments are: output buffer pointer, source pointer, source row stride, source multi stride */ - virtual void pretranspose_B_array(void *, const To *, const int, const int) { }; - - /* Implementation of the void * overload which casts its arguments to the appropriate type. */ - void pretranspose_B_array_generic(void *out, const void *in, const int row_stride, const int multi_stride) override { - pretranspose_B_array(out, static_cast<const To *>(in), row_stride, multi_stride); - } -}; - -template<typename GemmKernel> -inline -int unsigned get_total_window_size(const GemmKernel& kernel) -{ - auto window=kernel.get_window_size(); - - unsigned int total = 1; - for(unsigned i = 0; i != arm_gemm::ndrange_max; ++i) - { - total *= window.get_size(i); - } - - return total; -} - -} // namespace arm_gemm diff --git a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp b/arm_compute/core/NEON/kernels/convolution/common/activation.hpp deleted file mode 100644 index 091b1652c9..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/activation.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -namespace neon_convolution_kernels -{ - -enum class ActivationFunction -{ - None, - ReLU, - ReLU6, -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp b/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp deleted file mode 100644 index 799e95d3e6..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/alloc.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#ifdef ALLOC_ALIGN -#define ALLOCATE(x) aligned_alloc(ALLOC_ALIGN, x) -#else -#define ALLOCATE(x) malloc(x) -#endif diff --git a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp b/arm_compute/core/NEON/kernels/convolution/common/arm.hpp deleted file mode 100644 index 90e7828553..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/arm.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** Sets the macro __arm_any__ if compiling for Aarch32 or Aarch64. - * Includes `arm_neon.h` if compiling for either architecture. - */ - -#ifdef __arm__ -#define __arm_any__ -#endif // __arm__ - -#ifdef __aarch64__ -#define __arm_any__ -#endif // __aarch64__ - -#ifdef __arm_any__ -#include <arm_neon.h> -#endif // __arm_any__ diff --git a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp b/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp deleted file mode 100644 index 2ab2597785..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/convolution.hpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -enum PaddingType { - PADDING_SAME, PADDING_VALID -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp b/arm_compute/core/NEON/kernels/convolution/common/padding.hpp deleted file mode 100644 index 97b21e0ff5..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/padding.hpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include <cstddef> - -// Utilities for copying tensor tiles and adding/removing padding. -namespace padding -{ - -/* Copy a tile and apply padding to the output copy. - */ -template <typename T> -void copy_and_pad_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right, - T pad_value=static_cast<T>(0) -); - -/** Copy a tile and remove padding elements in the output. - */ -template <unsigned int TileRows, unsigned int TileCols> -class CopyCropped -{ - public: - static void execute( - size_t size, // Amount of data to copy - const void *inptr, - size_t in_row_stride, - size_t in_col_stride, - void *outptr, - size_t out_row_stride, - size_t out_col_stride, - unsigned int pad_top, - unsigned int pad_left, - unsigned int pad_bottom, - unsigned int pad_right - ); -}; - -template <typename T> -void crop_and_copy_tile( - unsigned int tile_rows, - unsigned int tile_cols, - unsigned int n_channels, - const T *inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - T *outptr, - unsigned int out_row_stride, - unsigned int out_col_stride, - unsigned int crop_top, - unsigned int crop_left, - unsigned int crop_bottom, - unsigned int crop_right -); - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/perf.h b/arm_compute/core/NEON/kernels/convolution/common/perf.h deleted file mode 100644 index 3c0d36646d..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/perf.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -/* Prototypes from perf.c */ - -void start_counter(int fd); -long long get_counter(int fd); -long long stop_counter(int fd); -int open_instruction_counter(void); -int open_cycle_counter(void); diff --git a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp deleted file mode 100644 index 6029cb67e3..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qasymm8.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include <cstdint> - -namespace qasymm8 -{ - -struct QAsymm8Params -{ - uint8_t quantize(float value) const; - float dequantize(uint8_t value) const; - - uint8_t offset; - float scale; -}; - -struct QAsymm8RescaleParams -{ - static QAsymm8RescaleParams make_rescale_params( - const QAsymm8Params& weight_quant, - const QAsymm8Params& input_quant, - const QAsymm8Params& output_quant - ); - - QAsymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -} diff --git a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp b/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp deleted file mode 100644 index 41bfbe4d8a..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/qsymm8.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include <cstdint> -#include <vector> -#include "qasymm8.hpp" - - -namespace qsymm8 { - -struct QSymm8Params { - int8_t quantize(float value) const; - float dequantize(int8_t value) const; - - float scale; -}; - -struct QSymm8RescaleParams { - static QSymm8RescaleParams - make_rescale_params(const QSymm8Params &weight_quant, - const QSymm8Params &input_quant, - const QSymm8Params &output_quant); - - QSymm8RescaleParams(int32_t shift, int32_t multiplier, float rescale); - - const int32_t shift, multiplier; - const float rescale; -}; - -struct QSymm8PerChannelParams { - int8_t quantize(float value, float scale) const; - float dequantize(int8_t value, float scale) const; - - std::vector<float> scales; -}; - -struct QSymm8PerChannelRescaleParams { - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const QSymm8PerChannelParams &input_quant, - const QSymm8PerChannelParams &output_quant); - - static QSymm8PerChannelRescaleParams - make_rescale_params(const QSymm8PerChannelParams &weight_quant, - const qasymm8::QAsymm8Params &input_quant, - const qasymm8::QAsymm8Params &output_quant); - - QSymm8PerChannelRescaleParams(std::vector<int32_t>& shift, std::vector<int32_t>& multiplier, std::vector<float>& rescale); - - std::vector<int32_t> shifts, multipliers; - std::vector<float> rescales; -}; - -} // namespace qsymm8 diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp deleted file mode 100644 index 243d305e19..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#ifndef DOXYGEN_SKIP_THIS -#include <cstdint> -#endif /* DOXYGEN_SKIP_THIS */ -#include "arm.hpp" - -namespace reorder { -/** Re-order a tensor from NCHW format to NHWC. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NCHW format. - * @param[out] out Output tensor, to be written in NHWC format. - * @param n_batches Number of batches in the tensors. - * @param n_channels Number of channels in the tensors - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_channels * in_channel_stride`. - * @param in_channel_stride Stride over channels in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols * out_col_stride`. - * @param out_col_stride Stride over columns in the output tensor. If `0` defaults to `n_channels`. - */ -template <typename T> -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride=0, - int in_channel_stride=0, - int in_row_stride=0, - int out_batch_stride=0, - int out_row_stride=0, - int out_col_stride=0 -); - -/** Re-order a tensor from NHWC format to NCHW. - * - * @note The stride parameters are optional and are provided to allow padding in either input or output tensors. - * - * @param[in] in Input tensor in NHWC format. - * @param[out] out Output tensor, to be written in NCHW format. - * @param n_batches Number of batches in the tensors. - * @param n_rows Height of the tensor - * @param n_cols Width of the tensor - * @param n_channels Number of channels in the tensors - * @param in_batch_stride Stride over batches in the input tensor. If `0` defaults to `n_rows * in_row_stride`. - * @param in_row_stride Stride over rows in the input tensor. If `0` defaults to `n_cols * in_col_stride`. - * @param in_col_stride Stride over columns in the input tensor. If `0` defaults to `n_channels`. - * @param out_batch_stride Stride over batches in the output tensor. If `0` defaults to `n_channels * out_channel_stride`. - * @param out_channel_stride Stride over channels in the output tensor. If `0` defaults to `n_rows * out_row_stride`. - * @param out_row_stride Stride over rows in the output tensor. If `0` defaults to `n_cols`. - */ -template <typename T> -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride=0, - int in_row_stride=0, - int in_col_stride=0, - int out_batch_stride=0, - int out_channel_stride=0, - int out_row_stride=0 -); - -/** Re-order a weight tensor from [Output feature map x Input feature map x - * Height x Width] format to [Height x Width x Input feature map x Output - * feature map] format. - */ -template <typename T> -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride=0, - int in_input_feature_map_stride=0, - int in_row_stride=0, - int out_row_stride=0, - int out_col_stride=0, - int out_input_feature_map_stride=0 -); - -/** Re-order a weight tensor from [Height x Width x Input feature map x Output - * feature map] format to [Output feature map x Input feature map x Height x - * Width] format. - */ -template <typename T> -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride=0, - int in_col_stride=0, - int in_input_feature_map_stride=0, - int out_output_feature_map_stride=0, - int out_input_feature_map_stride=0, - int out_row_stride=0 -); - -/*****************************************************************************/ -/* 32-bit implementation : NCHW -> NHWC - */ -template <> -inline void nchw_to_nhwc( - const int32_t* const in, - int32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - typedef int32_t T; - - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -= 4) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 channels worth of 4 columns, then zip to produce 4 columns - // worth of 4 channels. - int32x4_t channel_pixels[4]; - channel_pixels[0] = vld1q_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1q_s32(in_row + (c + 1)*in_channel_stride + j); - channel_pixels[2] = vld1q_s32(in_row + (c + 2)*in_channel_stride + j); - channel_pixels[3] = vld1q_s32(in_row + (c + 3)*in_channel_stride + j); - - const auto zip1 = vzipq_s32(channel_pixels[0], channel_pixels[2]); - const auto zip2 = vzipq_s32(channel_pixels[1], channel_pixels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_row + (j + 0)*out_col_stride + c, out_0.val[0]); - vst1q_s32(out_row + (j + 1)*out_col_stride + c, out_0.val[1]); - vst1q_s32(out_row + (j + 2)*out_col_stride + c, out_1.val[0]); - vst1q_s32(out_row + (j + 3)*out_col_stride + c, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -= 2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 channels worth of 2 columns, then zip to produce 2 columns - // worth of 2 channels. - int32x2_t channel_pixels[2]; - channel_pixels[0] = vld1_s32(in_row + (c + 0)*in_channel_stride + j); - channel_pixels[1] = vld1_s32(in_row + (c + 1)*in_channel_stride + j); - - const auto output = vzip_s32(channel_pixels[0], channel_pixels[1]); - - vst1_s32(out_row + (j + 0)*out_col_stride + c, output.val[0]); - vst1_s32(out_row + (j + 1)*out_col_stride + c, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_col = in_row + j + _j; - T* const out_col = out_row + (j + _j)*out_col_stride; - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -template <> -inline void nchw_to_nhwc( - const uint32_t* const in, - uint32_t* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast<const int32_t*>(in), - reinterpret_cast<int32_t*>(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -template <> -inline void nchw_to_nhwc( - const float* const in, - float* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - nchw_to_nhwc( - reinterpret_cast<const int32_t*>(in), - reinterpret_cast<int32_t*>(out), - n_batches, n_channels, n_rows, n_cols, - in_batch_stride, in_channel_stride, in_row_stride, - out_batch_stride, out_row_stride, out_col_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NCHW -> NHWC - */ -template <typename T> -inline void nchw_to_nhwc( - const T* const in, - T* const out, - const int n_batches, - const int n_channels, - const int n_rows, - const int n_cols, - int in_batch_stride, - int in_channel_stride, - int in_row_stride, - int out_batch_stride, - int out_row_stride, - int out_col_stride -) -{ - // Fill in the stride values - in_row_stride = (in_row_stride) ? in_row_stride : n_cols; - in_channel_stride = (in_channel_stride) ? in_channel_stride - : n_rows * in_row_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_channels * in_channel_stride; - - out_col_stride = (out_col_stride) ? out_col_stride : n_channels; - out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_rows * out_row_stride; - - // Perform the re-ordering - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in_batch + i*in_row_stride; - T* const out_row = out_batch + i*out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j*out_col_stride; - - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_col + c*in_channel_stride; - out_col[c] = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* 32-bit implementation : NHWC -> NCHW - */ -template <> -inline void nhwc_to_nchw( - const int32_t* const in, // Input data in NHWC form - int32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - typedef int32_t T; - - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column, beginning with chunks of 4 - int j = 0, j_remaining = n_cols; -#ifdef __arm_any__ - for (; j_remaining >= 4; j += 4, j_remaining -=4) - { - // For every channel, beginning with chunks of 4 - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 4; c += 4, c_remaining -= 4) - { - // Read 4 columns worth of 4 channels then zip to produce 4 channels - // worth of 4 columns. - int32x4_t pixel_channels[4]; - pixel_channels[0] = vld1q_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1q_s32(in_i + (j + 1)*in_col_stride + c); - pixel_channels[2] = vld1q_s32(in_i + (j + 2)*in_col_stride + c); - pixel_channels[3] = vld1q_s32(in_i + (j + 3)*in_col_stride + c); - - const auto zip1 = vzipq_s32(pixel_channels[0], pixel_channels[2]); - const auto zip2 = vzipq_s32(pixel_channels[1], pixel_channels[3]); - const auto out_0 = vzipq_s32(zip1.val[0], zip2.val[0]); - const auto out_1 = vzipq_s32(zip1.val[1], zip2.val[1]); - - vst1q_s32(out_i + j + (c + 0)*out_channel_stride, out_0.val[0]); - vst1q_s32(out_i + j + (c + 1)*out_channel_stride, out_0.val[1]); - vst1q_s32(out_i + j + (c + 2)*out_channel_stride, out_1.val[0]); - vst1q_s32(out_i + j + (c + 3)*out_channel_stride, out_1.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 4; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - for (; j_remaining >= 2; j += 2, j_remaining -=2) - { - int c = 0, c_remaining = n_channels; - for (; c_remaining >= 2; c += 2, c_remaining -= 2) - { - // Read 2 columns worth of 2 channels then zip to produce 2 channels - // worth of 2 columns. - int32x2_t pixel_channels[2]; - pixel_channels[0] = vld1_s32(in_i + (j + 0)*in_col_stride + c); - pixel_channels[1] = vld1_s32(in_i + (j + 1)*in_col_stride + c); - - const auto output = vzip_s32(pixel_channels[0], pixel_channels[1]); - - vst1_s32(out_i + j + (c + 0)*out_channel_stride, output.val[0]); - vst1_s32(out_i + j + (c + 1)*out_channel_stride, output.val[1]); - } - for (; c_remaining; c++, c_remaining--) - { - for (int _j = 0; _j < 2; _j++) - { - const T* const in_j = in_i + (j + _j)*in_col_stride; - T* const out_j = out_i + (j + _j); - - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } -#endif // __arm_any__ - for (; j_remaining; j++, j_remaining--) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -template <> -inline void nhwc_to_nchw( - const uint32_t* const in, // Input data in NHWC form - uint32_t* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast<const int32_t*>(in), - reinterpret_cast<int32_t*>(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -template <> -inline void nhwc_to_nchw( - const float* const in, // Input data in NHWC form - float* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Redirect to generic 32-bit implementation - nhwc_to_nchw( - reinterpret_cast<const int32_t*>(in), - reinterpret_cast<int32_t*>(out), - n_batches, n_rows, n_cols, n_channels, - in_batch_stride, in_row_stride, in_col_stride, - out_batch_stride, out_channel_stride, out_row_stride - ); -} - -/*****************************************************************************/ -/* Generic implementation : NHWC -> NCHW - */ -template <typename T> -inline void nhwc_to_nchw( - const T* const in, // Input data in NHWC form - T* const out, // Output data in NCHW form - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - int in_batch_stride, - int in_row_stride, - int in_col_stride, - int out_batch_stride, - int out_channel_stride, - int out_row_stride -) -{ - // Fill in stride values - in_col_stride = (in_col_stride) ? in_col_stride : n_channels; - in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride; - in_batch_stride = (in_batch_stride) ? in_batch_stride - : n_rows * in_row_stride; - - out_row_stride = (out_row_stride) ? out_row_stride : n_cols; - out_channel_stride = (out_channel_stride) ? out_channel_stride - : n_rows * out_row_stride; - out_batch_stride = (out_batch_stride) ? out_batch_stride - : n_channels * out_channel_stride; - - // Perform the re-ordering - // For every batch - for (int n = 0; n < n_batches; n++) - { - const T* const in_batch = in + n*in_batch_stride; - T* const out_batch = out + n*out_batch_stride; - - // For every row - for (int i = 0; i < n_rows; i++) - { - const T* const in_i = in_batch + i*in_row_stride; - T* const out_i = out_batch + i*out_row_stride; - - // For every column - for (int j = 0; j < n_cols; j++) - { - const T* const in_j = in_i + j*in_col_stride; - T* const out_j = out_i + j; - - // For every channel - for (int c = 0; c < n_channels; c++) - { - const T* const in_channel = in_j + c; - T* const out_channel = out_j + c*out_channel_stride; - *(out_channel) = *(in_channel); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template <typename T> -inline void ofm_ifm_h_w_to_h_w_ifm_ofm( - const T* const in, // Input in [Output x Input x Height x Width] form - T* const out, // Output in [Height x Width x Input x Output] form - const int n_output_feature_maps, - const int n_input_feature_maps, - const int n_rows, - const int n_cols, - int in_output_feature_map_stride, - int in_input_feature_map_stride, - int in_row_stride, - int out_row_stride, - int out_col_stride, - int out_input_feature_map_stride -) -{ - // Fill in stride values - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols; - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_rows * in_row_stride; - in_output_feature_map_stride = (in_output_feature_map_stride) - ? in_output_feature_map_stride - : n_input_feature_maps * in_input_feature_map_stride; - - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_output_feature_maps; - out_col_stride = (out_col_stride) - ? out_col_stride - : n_input_feature_maps * out_input_feature_map_stride; - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols * out_col_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j; - T* const out_col = out_row + j * out_col_stride; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride; - T* const out_ofm = out_ifm + ofm; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -/*****************************************************************************/ -/* Generic weight re-order implementation. - */ -template <typename T> -inline void h_w_ifm_ofm_to_ofm_ifm_h_w( - const T* const in, // Input in [Height x Width x Input x Output] form - T* const out, // Output in [Output x Input x Height x Width] form - const int n_rows, - const int n_cols, - const int n_input_feature_maps, - const int n_output_feature_maps, - int in_row_stride, - int in_col_stride, - int in_input_feature_map_stride, - int out_output_feature_map_stride, - int out_input_feature_map_stride, - int out_row_stride -) -{ - // Fill in the stride values - in_input_feature_map_stride = (in_input_feature_map_stride) - ? in_input_feature_map_stride - : n_output_feature_maps; - in_col_stride = (in_col_stride) - ? in_col_stride - : n_input_feature_maps * in_input_feature_map_stride; - in_row_stride = (in_row_stride) - ? in_row_stride - : n_cols * in_col_stride; - - out_row_stride = (out_row_stride) - ? out_row_stride - : n_cols; - out_input_feature_map_stride = (out_input_feature_map_stride) - ? out_input_feature_map_stride - : n_rows * out_row_stride; - out_output_feature_map_stride = (out_output_feature_map_stride) - ? out_output_feature_map_stride - : n_input_feature_maps * out_input_feature_map_stride; - - // Perform the re-ordering - for (int i = 0; i < n_rows; i++) - { - const T* const in_row = in + i * in_row_stride; - T* const out_row = out + i * out_row_stride; - - for (int j = 0; j < n_cols; j++) - { - const T* const in_col = in_row + j * in_col_stride; - T* const out_col = out_row + j; - - for (int ifm = 0; ifm < n_input_feature_maps; ifm++) - { - const T* const in_ifm = in_col + ifm * in_input_feature_map_stride; - T* const out_ifm = out_col + ifm * out_input_feature_map_stride; - - for (int ofm = 0; ofm < n_output_feature_maps; ofm++) - { - const T* const in_ofm = in_ifm + ofm; - T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride; - *(out_ofm) = *(in_ofm); - } - } - } - } -} - -} // namespace reorder diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp deleted file mode 100644 index ad0a677a8f..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor.hpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include <cstdlib> -#include <random> - -#include "alloc.hpp" - -enum TensorOrder -{ - NHWC, ///< [Batch x Height x Width x Channels] - NCHW, ///< [Batch x Channels x Height x Width] -}; - -struct Tensor4DShape -{ - int n_batches, n_rows, n_cols, n_channels; - TensorOrder ordering; - - // Create a new tensor with the default (NHWC) ordering - inline Tensor4DShape( - const int n_batches, - const int n_rows, - const int n_cols, - const int n_channels, - const TensorOrder ordering=NHWC - ) : n_batches(n_batches), - n_rows(n_rows), - n_cols(n_cols), - n_channels(n_channels), - ordering(ordering) - { - } - - inline int index(const int n, const int i, const int j, const int c) const - { - if (this->ordering == NHWC) - { - return ((n*this->n_rows + i)*this->n_cols + j)*this->n_channels + c; - } - else // NCHW - { - return ((n*this->n_channels + c)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size() const - { - return n_batches * n_rows * n_cols * n_channels; - } - - inline bool TestEq(const Tensor4DShape& other) const - { - return (n_batches == other.n_batches && - n_rows == other.n_rows && - n_cols == other.n_cols && - n_channels == other.n_channels); - } -}; - - -enum WeightOrder -{ - HWIO, ///< [Height x Width x Input channels x Output channels] - OIHW, ///< [Output channels x Input channels x Height x Width] -}; - -struct KernelShape -{ - int n_output_channels, n_rows, n_cols, n_input_channels; - WeightOrder ordering; - - inline KernelShape( - const int n_output_channels, - const int n_rows, - const int n_cols, - const int n_input_channels, - const WeightOrder ordering=HWIO - ) : n_output_channels(n_output_channels), - n_rows(n_rows), - n_cols(n_cols), - n_input_channels(n_input_channels), - ordering(ordering) - { - } - - inline int index(int oc, int i, int j, int ic) const - { - if (this->ordering == HWIO) - { - return ((i*this->n_cols + j)*this->n_input_channels + ic)*this->n_output_channels + oc; - } - else // OIHW - { - return ((oc*this->n_input_channels + ic)*this->n_rows + i)*this->n_cols + j; - } - } - - inline int size(void) const - { - return n_output_channels * n_rows * n_cols * n_input_channels; - } -}; - - -template <typename ShapeT, typename T> -class Tensor4D final -{ - public: - Tensor4D(ShapeT shape) : - shape(shape), - _data(reinterpret_cast<T*>(ALLOCATE(size_bytes()))) - { - Clear(); - } - - Tensor4D(const Tensor4D<ShapeT, T>&) = delete; - Tensor4D operator=(const Tensor4D<ShapeT, T>&) = delete; - - ~Tensor4D() { - free(_data); - } - - inline T* ptr() const { - return _data; - } - - inline size_t size_bytes() const { - return shape.size() * sizeof(T); - } - - /* Extract an element of the tensor. - * - * If the shape is a Tensor4DShape then the index is given as batch, row, - * column and channel. If the shape is a KernelShape then the index is - * given as output channel, row, column and input channel. - */ - inline T& element(const int a, const int b, const int c, const int d) const - { - return _data[shape.index(a, b, c, d)]; - } - - inline void Clear() { - Fill(static_cast<T>(0)); - } - - inline void Fill(T val) { - for (int i = 0; i < shape.size(); i++) - _data[i] = val; - } - - const ShapeT shape; - - private: - T* const _data; -}; diff --git a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp deleted file mode 100644 index 0c234431b1..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/tensor_utils.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "tensor.hpp" - -// Methods to print tensors and weights -void PrintTensor(const Tensor4D<Tensor4DShape, float>& tensor); -void PrintWeights(const Tensor4D<KernelShape, float>& weights); - -// Test the equivalence of two tensors -// Counts the instances that |a - b|/|a| > max_err -bool CmpTensors( - const Tensor4D<Tensor4DShape, float>& a, - const Tensor4D<Tensor4DShape, float>& b, - const float max_err=0.0f -); - -// Fill the tensor with a test pattern -void TestPattern(Tensor4D<Tensor4DShape, float>& tensor); -void TestPattern(Tensor4D<KernelShape, float>& weights); - -// Fill the tensor with random values -void Randomise(Tensor4D<Tensor4DShape, float>& tensor, const int seed=0); -void Randomise(Tensor4D<KernelShape, float>& weights, const int seed=0); diff --git a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp b/arm_compute/core/NEON/kernels/convolution/common/utils.hpp deleted file mode 100644 index 99b2282f7e..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/common/utils.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include <limits> - -void PrintMatrix(const float *const m, const int M, const int N, const int row_stride); - -constexpr inline int iceildiv(const int a, const int b) -{ - return (a + b - 1) / b; -} - -template <typename T> -inline T roundup(const T a, const T b) -{ - return b * iceildiv(a, b); -} - -template<typename T> -struct TypeBounds -{ - static constexpr T lower() noexcept { return std::numeric_limits<T>::has_infinity - ? -std::numeric_limits<T>::infinity() - : std::numeric_limits<T>::lowest(); }; - static constexpr T upper() noexcept { return std::numeric_limits<T>::has_infinity - ? std::numeric_limits<T>::infinity() - : std::numeric_limits<T>::max(); }; -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template<> -struct TypeBounds<__fp16> -{ - static constexpr __fp16 lower() noexcept { return -std::numeric_limits<float>::infinity(); }; - static constexpr __fp16 upper() noexcept { return std::numeric_limits<float>::infinity(); } -}; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp deleted file mode 100644 index a4a833d90a..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include <arm_neon.h> -#include "activation.hpp" -#include "padding.hpp" - -namespace depthwise -{ - -namespace nck = neon_convolution_kernels; - -class IDepthwiseConvolution -{ - public: - virtual ~IDepthwiseConvolution() = default; - - virtual int output_size( - int dim_size, - unsigned int padding_before, - unsigned int padding_after - ) const = 0; - - /* Set input tensor and stride. */ - virtual void set_input(const void *inptr) = 0; - virtual void set_input(const void *inptr, int column_stride) = 0; - virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0; - virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0; - - /* Set output tensor and stride. */ - virtual void set_output(void *outptr) = 0; - virtual void set_output(void *outptr, int column_stride) = 0; - virtual void set_output(void *outptr, int row_stride, int column_stride) = 0; - virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0; - - /* Weights and biases are re-ordered to improve memory access patterns. Use - * these methods to determine the size of the re-pack buffer and to set the - * address (and implicitly reorder the weights and biases into) the buffer. - */ - virtual size_t get_packed_params_size(void) const = 0; - virtual void set_packed_params_buffer(void *) = 0; - - virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0; - virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0; - virtual void pack_params( - void *buffer, - const void* weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const = 0; - - /* Working space is used to pad tensors on the fly. Before running any - * inference check the amount of space required, allocate and provide a - * pointer to the convolution engine. - */ - virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0; - virtual void set_working_space(void *) = 0; - - virtual unsigned int get_window(void) const = 0; - virtual void run( - unsigned int start, - unsigned int stop, - unsigned int threadid=0 - ) = 0; -}; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut, - typename Derived -> -class DepthwiseConvolutionBase : public IDepthwiseConvolution -{ - public: - // Information about the specific convolution instance - using InputType = TIn; - using BiasType = TBias; - using OutputType = TOut; - static constexpr int output_tile_rows = OutputTileRows; - static constexpr int output_tile_cols = OutputTileCols; - static constexpr int kernel_rows = KernelRows; - static constexpr int kernel_cols = KernelCols; - static constexpr int stride_rows = StrideRows; - static constexpr int stride_cols = StrideCols; - static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows; - static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols; - - /** Create a new depthwise convolution engine. - * - * @param[in] n_batches Number of batches tensors. - * @param[in] n_input_rows Number of rows in input tensor. - * @param[in] n_input_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input and output tensors. - */ - DepthwiseConvolutionBase( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - /** Create a new depthwise convolution engine. - * - * @param[in] n_batches Number of batches tensors. - * @param[in] n_input_rows Number of rows in input tensor. - * @param[in] n_input_cols Number of columns in input tensor. - * @param[in] n_channels Number of channels in input and output tensors. - */ - DepthwiseConvolutionBase( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - // Cannot copy or move a DepthwiseConvolution. - DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete; - DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete; - - /* Set input tensor and stride. */ - void set_input(const void *inptr) override; - void set_input(const void *inptr, int column_stride) override; - void set_input(const void *inptr, int row_stride, int column_stride) override; - void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; - - /* Set output tensor and stride. */ - void set_output(void *outptr) override; - void set_output(void *outptr, int column_stride) override; - void set_output(void *outptr, int row_stride, int column_stride) override; - void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; - - /** Get the number of output rows/columns. - * - * @param[in] dim_size Number of elements in the dimension (rows/columns) - * @param[in] same_padding True if the padding is SAME, otherwise false. - */ - static int get_output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ); - - int output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ) const override; - - /* Determine how much memory is required to store the packed weights and - * biases. - */ - size_t get_packed_params_size(void) const override; - - /* Set the buffer for the packed weights and biases, and perform the - * packing. - */ - void set_packed_params_buffer(void *buffer) override; - - void pack_params(const void *weights, const void *biases=nullptr) const override; - - void pack_params( - void *buffer, - const void *weights, - const void *biases=nullptr - ) const override; - - void pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const override; - - /** Query the amount of working space required. - * @param[in] The largest number of threads which will be used to execute - * the kernel. - */ - size_t get_working_space_size(unsigned int n_threads=1) const override; - - /** Set the working space buffer. - */ - void set_working_space(void *buffer) override; - - /** Get the window of work to be performed by an instance of the operator. - */ - unsigned int get_window(void) const override; - - /** Perform a portion of the work associated with the operator. - * - * Will perform the window of work described by $[start, stop)$. - * - * @param[in] start Start of the window of work to perform. - * @param[in] stop End of the work to perform. - * @param[in] ID of the thread performing the work. - */ - void run( - unsigned int start, - unsigned int stop, - unsigned int threadid=0 - ) override; - - protected: - /** Get the value to use to pad the tensor. - */ - TIn _input_padding_value(void) const; - - /** Implementation of the parameter packing. - */ - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - /** Process a tile-row of the tensors. - */ - void process_tile_row( - unsigned int threadid, - int n_channels, - const void* packed_params, - const InputType* inptr, - OutputType* outptr, - int row_pad_in_top, - int row_pad_in_left, - int row_pad_in_bottom, - int row_pad_out_bottom, - int n_tiles, - int n_input_cols, - int n_output_cols - ); - - /** Process a single tile of the tensor. - * - * This method will apply input/output padding (if required) and call the - * depthwise tile implementation. - */ - void process_tile( - unsigned int threadid, - int n_channels, - const void* packed_params, - const InputType* inptr, - OutputType* outptr, - int pad_in_top, - int pad_in_left, - int pad_in_bottom, - int pad_in_right, - int pad_out_bottom, - int pad_out_right - ); - - /** Perform depthwise convolution on a single tile. - */ - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - OutputType* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptrs[inner_tile_rows][inner_tile_cols], - OutputType* outptrs[output_tile_rows][output_tile_cols] - ); - - int n_channels(void) const; - - private: - // Member variables of instances of a convolution engine. - const InputType* _input; - OutputType* _output; - void* _packed_parameters; - void* _working_space; // Per-thread working space - const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, - _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; - const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right; - const nck::ActivationFunction _activation; - - // Stride information for a convolution instance - int _input_col_stride, _input_row_stride, _input_batch_stride; - int _output_col_stride, _output_row_stride, _output_batch_stride; - - // Methods for getting access to working space - size_t _get_input_working_space_size(void) const; - size_t _get_output_working_space_size(void) const; - - void *_get_input_working_space(unsigned int threadid) const; - void *_get_output_working_space(unsigned int threadid) const; -}; - - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut -> -class DepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - using Base::DepthwiseConvolutionBase; - - protected: - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const TIn* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - TOut* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; - - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float -> : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float, float, float - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const float* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - float* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - float* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t -> : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t - > -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t, - DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - float16_t, float16_t, float16_t - > >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const float16_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - float16_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); -}; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp deleted file mode 100644 index e0d7f0c7f1..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include <deque> -#include <functional> -#include <memory> - -#include "depthwise.hpp" - -namespace depthwise -{ - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols, - typename TIn, typename TBias, typename TOut -> -class DilatedDepthwiseConvolution : public IDepthwiseConvolution -{ - public: - /** Create a new dilated depthwise convolution engine. - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - /** Create a new dilated depthwise convolution engine. - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - // Cannot copy or move a DilatedDepthwiseConvolution. - DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete; - DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete; - - /* Set input tensor and stride. */ - void set_input(const void *inptr) override; - void set_input(const void *inptr, int column_stride) override; - void set_input(const void *inptr, int row_stride, int column_stride) override; - void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override; - - /* Set output tensor and stride. */ - void set_output(void *outptr) override; - void set_output(void *outptr, int column_stride) override; - void set_output(void *outptr, int row_stride, int column_stride) override; - void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override; - - static int get_output_size( - int dim_size, - unsigned int padding_before, - unsigned int padding_after, - int dilation_factor - ); - - int output_size( - int dim_size, unsigned int padding_before, unsigned int padding_after - ) const override; - - /* Weights and biases are re-ordered to improve memory access patterns. Use - * these methods to determine the size of the re-pack buffer and to set the - * address (and implicitly reorder the weights and biases into) the buffer. - */ - size_t get_packed_params_size(void) const override; - void set_packed_params_buffer(void *) override; - - void pack_params(const void *weights, const void *biases=nullptr) const override; - void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const override; - void pack_params( - void *buffer, - const void* weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const override; - - /* Working space is used to pad tensors on the fly. Before running any - * inference check the amount of space required, allocate and provide a - * pointer to the convolution engine. - */ - size_t get_working_space_size(unsigned int nthreads=1) const override; - void set_working_space(void *) override; - - unsigned int get_window(void) const override; - void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; - - protected: - /** Protected constructor which also accepts a function to construct a new - * subconvolution - */ - DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right, - std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn - ); - - const int _dilation_factor; - const int _n_input_rows, _n_input_cols, _n_channels; - const int _padding_top, _padding_left; - const int _n_output_rows, _n_output_cols; - - /* Dilated depthwise convolution is performed through repeated calls to - * non-dilated convolutions. If the dilation factor is $n$, then we perform - * $(n + 1)^2$ depthwise convolutions. - */ - using BaseDepthwise = DepthwiseConvolution< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - TIn, TBias, TOut - >; - std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs; -}; - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp deleted file mode 100644 index 37c1f1bc84..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized.hpp +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "depthwise.hpp" -#include "qasymm8.hpp" -#include "qsymm8.hpp" -#pragma once - -using namespace neon_convolution_kernels; -using namespace qasymm8; - -inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32x4_t& b) -{ - return vqrdmulhq_s32(a, b); -} - -inline int32x4_t saturating_doubling_high_mul(const int32x4_t& a, const int32_t& b) -{ - return vqrdmulhq_n_s32(a, b); -} - -inline int32_t saturating_doubling_high_mul(const int32_t& a, const int32_t& b) -{ - return vget_lane_s32(vqrdmulh_n_s32(vdup_n_s32(a), b), 0); -} - -inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int32x4_t shift) -{ - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); - const int32x4_t fixed = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed, shift); -} - -inline int32x4_t rounding_divide_by_exp2(const int32x4_t& x, const int exponent) -{ - const int32x4_t shift = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift), 31); - const int32x4_t fixed = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed, shift); -} - -inline int32x2_t rounding_divide_by_exp2(const int32x2_t& x, const int exponent) -{ - const int32x2_t shift = vdup_n_s32(-exponent); - const int32x2_t fixup = vshr_n_s32(vand_s32(x, shift), 31); - const int32x2_t fixed = vqadd_s32(x, fixup); - return vrshl_s32(fixed, shift); -} - -inline int32_t rounding_divide_by_exp2(const int32_t& x, const int exponent) -{ - const int32x2_t xs = vdup_n_s32(x); - return vget_lane_s32(rounding_divide_by_exp2(xs, exponent), 0); -} - -namespace depthwise -{ - -namespace nck = neon_convolution_kernels; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class QAsymm8DepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols> -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QAsymm8DepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols> - >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QAsymm8DepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - protected: - uint8_t _input_padding_value(void) const; - - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - uint8_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); - - private: - // Quantization parameters - const qasymm8::QAsymm8Params _weights_quant, _inputs_quant, _output_quant; - const qasymm8::QAsymm8RescaleParams rescale_parameters; -}; - -template < - unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols -> -class QSymm8HybridPerChannelDepthwiseConvolution : public DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols> -> -{ - using Base = DepthwiseConvolutionBase< - OutputTileRows, OutputTileCols, - KernelRows, KernelCols, - StrideRows, StrideCols, - uint8_t, int32_t, uint8_t, - QSymm8HybridPerChannelDepthwiseConvolution<OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, StrideCols> - >; - friend Base; - using InputType = typename Base::InputType; - using OutputType = typename Base::OutputType; - - public: - QSymm8HybridPerChannelDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qsymm8::QSymm8PerChannelParams& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - QSymm8HybridPerChannelDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - nck::ActivationFunction activation, - const qsymm8::QSymm8PerChannelParams& weight_quantisation, - const qasymm8::QAsymm8Params& input_quantisation, - const qasymm8::QAsymm8Params& output_quantisation, - const qsymm8::QSymm8PerChannelRescaleParams& rescale_parameters, - unsigned int padding_top, - unsigned int padding_left, - unsigned int padding_bottom, - unsigned int padding_right - ); - - size_t get_packed_params_size(void) const override - { - return this->n_channels() * (sizeof(int8_t)*KernelRows*KernelCols + 3*sizeof(int32_t)); - - } - - protected: - uint8_t _input_padding_value(void) const; - - void _pack_params( - void *buffer, - const void *weights, - unsigned int weight_row_stride, - unsigned int weight_col_stride, - const void *biases=nullptr - ) const; - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptr, - unsigned int in_row_stride, - unsigned int in_col_stride, - uint8_t* outptr, - unsigned int out_row_stride, - unsigned int out_col_stride - ); - - template <nck::ActivationFunction Activation> - void execute_tile( - int n_channels, - const void* packed_params, - const uint8_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols], - uint8_t* outptrs[Base::output_tile_rows][Base::output_tile_cols] - ); - - private: - // Quantization parameters - const qsymm8::QSymm8PerChannelParams _weights_quant; - const qasymm8::QAsymm8Params _input_quant, _output_quant; - const qsymm8::QSymm8PerChannelRescaleParams _rescale_parameters; -}; - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp deleted file mode 100644 index cf1c6f581f..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "depthwise_dilated.hpp" -#include "depthwise_quantized.hpp" - -namespace depthwise { - -template <unsigned int OutputTileRows, unsigned int OutputTileCols, - unsigned int KernelRows, unsigned int KernelCols, - unsigned int StrideRows, unsigned int StrideCols> -class QAsymm8DilatedDepthwiseConvolution - : public DilatedDepthwiseConvolution< - OutputTileRows, OutputTileCols, KernelRows, KernelCols, StrideRows, - StrideCols, uint8_t, int32_t, uint8_t> { -public: - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - const qasymm8::QAsymm8RescaleParams &rescale_parameters, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); - - /** Create a new dilated depthwise convolution engine. - */ - QAsymm8DilatedDepthwiseConvolution( - int n_batches, int n_input_rows, int n_input_cols, int n_channels, - int dilation_factor, int n_output_rows, int n_output_cols, - nck::ActivationFunction activation, - const qasymm8::QAsymm8Params &weight_quantisation, - const qasymm8::QAsymm8Params &input_quantisation, - const qasymm8::QAsymm8Params &output_quantisation, - const qasymm8::QAsymm8RescaleParams& rescale_parameters, - unsigned int padding_top, unsigned int padding_left, - unsigned int padding_bottom, unsigned int padding_right); -}; - -} // namespace depthwise diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp deleted file mode 100644 index bc0d9d4296..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd.hpp +++ /dev/null @@ -1,621 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once - -#include "arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp" - -#include <cstddef> -#include <utility> - -namespace winograd -{ - -class ITransform -{ - public: - virtual ~ITransform() = default; - - /** - * Get the working space required to perform the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param nthreads The greatest number of threads that will be used to execute the transform. - * @return Size of working space required in bytes. - */ - virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0; - - /** - * Set the working space to be used by the transformation. - * - * Note, the working space is only required when performing the - * transformation - hence it can be reused whenever the transformation is - * not running. - * - * @param Pointer to the working space. - */ - virtual void set_working_space(void *buffer) = 0; - - /** - * Get the window of work a given operator can perform. - */ - virtual unsigned int get_window() const = 0; - - /** - * Perform work upon a window of the transform. - */ - virtual void run(unsigned int start, unsigned int stop, unsigned int threadid=0) = 0; -}; - -class IInputTransform : public ITransform -{ - public: - virtual ~IInputTransform() = default; - - /** - * Set the pointer to the (NHWC-ordered) tensor to be transformed. - */ - virtual void set_input_tensor(const void *input) = 0; - - /** - * Set the pointer to the (NHWC-ordered) tensor to be transformed. - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_input_tensor(const void *input, int col_stride) = 0; - - /** - * Set the pointer to the (NHWC-ordered) tensor to be transformed. - * @param row_stride Stride between rows of the tensor, measured in elements (not bytes). - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_input_tensor(const void *input, int row_stride, int col_stride) = 0; - - /** - * Set the pointer to the (NHWC-ordered) tensor to be transformed. - * @param batch_stride Stride between batches of the tensor, measured in elements (not bytes). - * @param row_stride Stride between rows of the tensor, measured in elements (not bytes). - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) = 0; - - /** - * Set pointers to the matrices written by the transform. - * @param matrices Pointer to the start of the first matrix representing the transformed input. - * @param inter_matrix_stride Stride (in elements) between matrices. - * @param matrix_row_stride Stride (in elements) between the rows within a single matrix. - */ - virtual void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0; -}; - -class IOutputTransform : public ITransform -{ - public: - virtual ~IOutputTransform() = default; - - /** - * Set pointers to the matrices written by the transform. - * @param matrices Pointer to the start of the first matrix representing the input to the transform. - * @param inter_matrix_stride Stride (in elements) between matrices. - * @param matrix_row_stride Stride (in elements) between the rows within a single matrix. - */ - virtual void set_input_matrices(const void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0; - - /** - * Set pointer to the bias tensor (can be ignored or called with nullptr for no bias. - */ - virtual void set_bias(const void *bias=nullptr) = 0; - - /** - * Set pointer to the output tensor produced by the transform. - */ - virtual void set_output_tensor(void *output) = 0; - - /** - * Set pointer to the output tensor produced by the transform. - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_output_tensor(void *output, int col_stride) = 0; - - /** - * Set pointer to the output tensor produced by the transform. - * @param row_stride Stride between rows of the tensor, measured in elements (not bytes). - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_output_tensor(void *output, int row_stride, int col_stride) = 0; - - /** - * Set pointer to the output tensor produced by the transform. - * @param batch_stride Stride between batches of the tensor, measured in elements (not bytes). - * @param row_stride Stride between rows of the tensor, measured in elements (not bytes). - * @param col_stride Stride between columns of the tensor, measured in elements (not bytes). - */ - virtual void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) = 0; -}; - -class IWeightTransform : public ITransform -{ - public: - virtual ~IWeightTransform() = default; - - /** Set pointer to the weight tensor read by the transform. */ - virtual void set_weight_tensor(const void *weights) = 0; - - /** - * Set pointers to the matrices written by the transform. - * @param matrices Pointer to the start of the first matrix representing the transformed input. - * @param inter_matrix_stride Stride (in elements) between matrices. - * @param matrix_row_stride Stride (in elements) between the rows within a single matrix. - */ - virtual void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) = 0; -}; - -enum class WinogradRoots -{ - Integers, -}; - -template <int InnerTileRows, int InnerTileCols, typename TIn, typename TOut, WinogradRoots Roots> -class InputTransform : public IInputTransform -{ - public: - /** Create an InputTransform operator fixed on a given problem and set of - * pointers. - */ - InputTransform( - int kernel_rows, /**< Number of rows in the kernel */ - int kernel_cols, /**< Number of columns in the kernel */ - int n_batches, /**< Number of batches in input tensor. */ - int n_rows, /**< Number of rows in input tensor. */ - int n_cols, /**< Number of columns in input tensor. */ - int n_channels, /**< Number of channels in input tensor. */ - int padding_top, /**< Padding to apply to the top of the image. */ - int padding_left, /**< Padding to apply to the left of the image. */ - int padding_bottom, /**< Padding to apply to the bottom of the image. */ - int padding_right /**< Padding to apply to the right of the image. */ - ); - - InputTransform(InputTransform&) = delete; - InputTransform operator=(InputTransform&) = delete; - - /** Set pointers to the input tensor read by the transform. */ - void set_input_tensor(const void *input) override; - void set_input_tensor(const void *input, int col_stride) override; - void set_input_tensor(const void *input, int row_stride, int col_stride) override; - void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) override; - - /** Set pointers to the matrices written by the transform. */ - void set_output_matrices(void *matrices, int iter_matrix_stride, int matrix_row_stride) override; - - /** Get the working space required to perform the transformation. */ - size_t get_working_space_size(unsigned int nthreads=1) const override; - void set_working_space(void *buffer) override; - - /** Get the window of work a given operator can perform. */ - unsigned int get_window() const override; - static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window - - /** Perform work upon a window of the input. */ - void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; - - protected: - const int _n_batches, _n_rows, _n_cols, _n_channels; - - private: - void transform_unpadded_tile( - unsigned int threadid, - int n_channels, - TOut *outptr, - const TIn *inptr - ); - - void transform_padded_tile( - unsigned int threadid, - int n_channels, - TOut *outptr, - const TIn *inptr, - int padding_top, - int padding_left, - int padding_bottom, - int padding_right - ); - - /* Tile implementation */ - static void transform_tile( - int n_channels, /** @param[in] Number of channels in the tensor. */ - const TIn* inptr_base, /** @param[in] Pointer to the base of the input tile. */ - int input_row_stride, /** @param[in] Stride between rows of the input tensor. */ - int input_col_stride, /** @param[in] Stride between columns of the input tensor. */ - TOut* mptr_base, /** @param[out] Base pointer to transformed input matrices. */ - int matrix_stride /** @param[in] Stride between matrices in the input space. */ - ); - - /** Get the working space for a thread. */ - void * get_working_space(unsigned int threadid) const; - - const TIn* _inptr; - TOut* _outptr; - - const int _overlap_rows, _overlap_cols; - const int _padding_top, _padding_left, _padding_bottom, _padding_right; - const int _tiles_M, _tiles_N; - int _matrix_stride, _matrix_row_stride, _matrix_batch_stride; - int _in_col_stride, _in_row_stride, _in_batch_stride; - - const int _working_space_col_stride, _working_space_row_stride; - TIn *_working_space; -}; - -template <int InnerTileRows, typename TIn, typename TOut, WinogradRoots Roots> -class InputTransform<InnerTileRows, 1, TIn, TOut, Roots> : - public InputTransform<1, InnerTileRows, TIn, TOut, Roots> -{ - using Base = InputTransform<1, InnerTileRows, TIn, TOut, Roots>; - - public: - InputTransform( - int kernel_rows, /**< Number of rows in the kernel. */ - int kernel_cols, /**< Number of columns in the kernel. */ - int n_batches, /**< Number of batches in input tensor. */ - int n_rows, /**< Number of rows in input tensor. */ - int n_cols, /**< Number of columns in input tensor. */ - int n_channels, /**< Number of channels in input tensor. */ - int padding_top, /**< Padding to apply to the top of the image. */ - int padding_left, /**< Padding to apply to the left of the image. */ - int padding_bottom, /**< Padding to apply to the bottom of the image. */ - int padding_right /**< Padding to apply to the right of the image. */ - ); - - /** Set pointers to the input tensor read by the transform. */ - void set_input_tensor(const void *input) override; - void set_input_tensor(const void *input, int col_stride) override; - void set_input_tensor(const void *input, int row_stride, int col_stride) override; - void set_input_tensor(const void *input, int batch_stride, int row_stride, int col_stride) override; -}; - -template < - int KernelRows, int KernelCols, - int InnerTileRows, int InnerTileCols, - typename TIn, typename TOut, - WinogradRoots Roots -> -class OutputTransform : public IOutputTransform -{ - public: - OutputTransform( - int n_batches, /**< Number of batches in output tensor. */ - int n_rows, /**< Number of rows in output tensor. */ - int n_cols, /**< Number of columns in output tensor. */ - int n_channels, /**< Number of channels in output tensor. */ - const arm_gemm::Activation &activation - ); - - OutputTransform(OutputTransform&) = delete; - OutputTransform operator=(OutputTransform&) = delete; - - /** Set pointers to the matrices read by the transform. */ - void set_input_matrices(const void *matrices, int iter_matrix_stride, int matrix_row_stride) override; - - /** Set pointer to the bias tensor (can be ignored or called with nullptr for no bias */ - void set_bias(const void *bias=nullptr) override; - - /** Set pointers to the output tensor written by the transform. */ - void set_output_tensor(void *output) override; - void set_output_tensor(void *output, int col_stride) override; - void set_output_tensor(void *output, int row_stride, int col_stride) override; - void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) override; - - /** Get the working space required to perform the transformation. */ - size_t get_working_space_size(unsigned int nthreads=1) const override; - void set_working_space(void *buffer) override; - - /** Get the window of work a given operator can perform. */ - unsigned int get_window() const override; - static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window - - /** Perform work upon a window of the input. */ - void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; - - protected: - static constexpr int inner_tile_rows = InnerTileRows; - static constexpr int inner_tile_cols = InnerTileCols; - static constexpr int output_tile_rows = InnerTileRows - KernelRows + 1; - static constexpr int output_tile_cols = InnerTileCols - KernelCols + 1; - - const int _n_batches, _n_rows, _n_cols, _n_channels; - const TOut _output_min, _output_max; - - private: - void transform_uncropped_tile( - unsigned int threadid, - int n_channels, - TOut *outptr, - const TIn *inptr, - const TOut *biases - ); - - void transform_cropped_tile( - unsigned int threadid, - int n_channels, - TOut *outptr, - const TIn *inptr, - const TOut *biases, - int pad_bottom, - int pad_right - ); - - /** Implementation of the tile transformation method. */ - static void transform_tile( - int n_channels, - const TIn* matrix_base, - int matrix_stride, - const TOut* biases, - TOut* output, - int output_row_stride, - int output_col_stride, - TOut output_min, - TOut output_max - ); - - /** Get the working space for a thread. */ - void * get_working_space(unsigned int threadid) const; - - const TIn* _matrix_base; - const TOut* _biases; - int _matrix_stride, _matrix_row_stride, _matrix_batch_stride; - TOut* _outptr; - const int _tiles_M, _tiles_N; - int _out_col_stride, _out_row_stride, _out_batch_stride; - - const int _working_space_col_stride, _working_space_row_stride; - TOut *_working_space; -}; - -template < - int KernelRows, - int InnerTileRows, - typename TIn, typename TOut, - WinogradRoots Roots -> -class OutputTransform<KernelRows, 1, InnerTileRows, 1, TIn, TOut, Roots> : - public OutputTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots> -{ - using Base = OutputTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>; - - public: - OutputTransform( - int n_batches, /**< Number of batches in output tensor. */ - int n_rows, /**< Number of rows in output tensor. */ - int n_cols, /**< Number of columns in output tensor. */ - int n_channels, /**< Number of channels in output tensor. */ - const arm_gemm::Activation &activation - ); - - /** Set pointers to the output tensor written by the transform. */ - void set_output_tensor(void *output) override; - void set_output_tensor(void *output, int col_stride) override; - void set_output_tensor(void *output, int row_stride, int col_stride) override; - void set_output_tensor(void *output, int batch_stride, int row_stride, int col_stride) override; -}; - -template < - int KernelRows, int KernelCols, - int InnerTileRows, int InnerTileCols, - typename TIn, typename TOut, - WinogradRoots Roots -> -class WeightTransform : public IWeightTransform -{ - public: - WeightTransform( - int n_output_channels, /**< Number of output channels in the kernel. */ - int n_input_channels /**< Number of input channels in the kernel. */ - ); - - WeightTransform(WeightTransform&) = delete; - WeightTransform operator=(WeightTransform&) = delete; - - /** Set pointer to the weight tensor read by the transform. */ - void set_weight_tensor(const void *weights) override; - - /** Set pointer to the matrices written by the transform. */ - void set_output_matrices(void *matrices, int inter_matrix_stride, int matrix_row_stride) override; - - /** Get the working space required to perform the transformation. */ - size_t get_working_space_size(unsigned int nthreads=1) const override; - void set_working_space(void *buffer) override; - - /** Get the window of work a given operator can perform. */ - unsigned int get_window() const override; - static constexpr unsigned int WINDOW_BLOCK = 16; // Base size of window - - /** Perform work upon a window of the input. */ - void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override; - - protected: - static const int kernel_rows = KernelRows; - static const int kernel_cols = KernelCols; - static const int inner_tile_rows = InnerTileRows; - static const int inner_tile_cols = InnerTileCols; - - private: - /** Apply the transform to a tensor. */ - static void execute( - int n_output_channels, - int n_input_channels, - const TIn* input, - TOut* output, - int matrix_stride, - int matrix_row_stride - ); - - const int _n_output_channels, _n_input_channels; - TOut *_matrices; - int _matrix_stride, _matrix_row_stride; - const TIn *_weights; -}; - -template <int KernelRows, int InnerTileRows, typename TIn, typename TOut, WinogradRoots Roots> -class WeightTransform<KernelRows, 1, InnerTileRows, 1, TIn, TOut, Roots> : - public WeightTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots> -{ - public: - using WeightTransform<1, KernelRows, 1, InnerTileRows, TIn, TOut, Roots>::WeightTransform; -}; - -template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols, WinogradRoots Roots> -class WinogradGEMM -{ - public: - // Information about the specific Winograd instance - static constexpr int output_tile_rows = OutputTileRows; - static constexpr int output_tile_cols = OutputTileCols; - static constexpr int kernel_rows = KernelRows; - static constexpr int kernel_cols = KernelCols; - static constexpr int inner_tile_rows = output_tile_rows + kernel_rows - 1; - static constexpr int inner_tile_cols = output_tile_cols + kernel_cols - 1; - static constexpr int N_GEMMS = inner_tile_rows * inner_tile_cols; - - /** Transform weights from the spatial to the Winograd domain. */ - template <typename TIn, typename TOut> - using WeightsTransform = WeightTransform< - KernelRows, KernelCols, inner_tile_rows, inner_tile_cols, - TIn, TOut, Roots - >; - - /** Transform input feature maps from the spatial to the Winograd domain. - */ - template <typename TIn, typename TOut> - using InputTransform = InputTransform< - inner_tile_rows, inner_tile_cols, TIn, TOut, Roots - >; - - /** Transform output feature maps from the Winograd to the spatial domain. - */ - template <typename TIn, typename TOut> - using OutputTransform = OutputTransform< - KernelRows, KernelCols, inner_tile_rows, inner_tile_cols, - TIn, TOut, Roots - >; - - /** Perform a convolution. - */ - template <typename TOut, typename TIn, typename TInGEMM=TIn, typename TOutGEMM=TOut> - class Convolution - { - public: - // Information about the typed Winograd instance - typedef TOut OutputType; - typedef TOutGEMM GemmOutputType; - typedef TInGEMM GemmInputType; - typedef TIn InputType; - - /** Get the output shape of a convolution. */ - static std::pair<unsigned int, unsigned int> get_output_shape( - const std::pair<unsigned int, unsigned int> input_shape, - bool padding_same); - - /** Get the memory required to store the kernel transformed into the - * Winograd domain. - */ - static size_t get_kernel_storage_size(unsigned int n_input_channels, - unsigned int n_output_channels); - - /** Get the memory required to store the input tensor transformed into - * the Winograd domain. - */ - static size_t get_input_storage_size( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of input rows - unsigned int n_cols, // Number of input columns - unsigned int n_channels, // Number of input channels - bool padding_same); - - /** Get the memory required to store the output tensor in the Winograd - * domain. - */ - static size_t get_output_storage_size( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of output rows - unsigned int n_cols, // Number of output columns - unsigned int n_channels // Number of output channels - ); - - /** Get the memory required to apply a Winograd operator to some input. - */ - static size_t get_working_space_size( - unsigned int n_batches, - unsigned int n_rows, // Number of input rows - unsigned int n_cols, // Number of input columns - unsigned int n_input_channels, // Number of input channels - unsigned int n_output_channels, // Number of output channels - bool padding_same); - - /* Get the memory required by a single "input" matrix. - */ - static size_t get_input_matrix_size( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of input rows - unsigned int n_cols, // Number of input columns - unsigned int n_channels, // Number of input channels - bool padding_same); - - static int get_input_matrix_stride( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of input rows - unsigned int n_cols, // Number of input columns - unsigned int n_channels, // Number of input channels - bool padding_same); - - /* Get the memory required by a single "output" matrix. - */ - static size_t get_output_matrix_size( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of output rows - unsigned int n_cols, // Number of output columns - unsigned int n_channels // Number of output channels - ); - - static int get_output_matrix_stride( - unsigned int n_batches, // Number of batches - unsigned int n_rows, // Number of output rows - unsigned int n_cols, // Number of output columns - unsigned int n_channels // Number of output channels - ); - - /* Get the memory required by a single "kernel" matrix. - */ - static size_t get_kernel_matrix_size(unsigned int n_input_channels, - unsigned int n_output_channels); - static int get_kernel_matrix_stride(unsigned int n_input_channels, - unsigned int n_output_channels); - - static constexpr int M_BLOCK = 4; /** Size of block used by GEMM. */ - static constexpr int N_BLOCK = 16; /** Size of block used by GEMM. */ - }; -}; - -} // namespace winograd diff --git a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp b/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp deleted file mode 100644 index ed8fede385..0000000000 --- a/arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#pragma once -#include "arm_gemm_local.hpp" -#include "arm_gemm.hpp" -#include "winograd.hpp" - -namespace winograd -{ - - -class IWinogradConvolutionLayer -{ - public: - virtual ~IWinogradConvolutionLayer() = default; - - virtual unsigned int weight_transform_get_window(void) const = 0; - virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0; - - virtual IInputTransform& input_transform(void) = 0; // Expose the input transform - virtual IOutputTransform& output_transform(void) = 0; // Expose the output transform - virtual arm_gemm::IGemmCommon *gemm(void) = 0; // Expose the underlying GEMM -}; - -/** Example of how to construct an ACL-like interface. - * - * Use `get_weight_storage_size`, `get_input_storage_size` and - * `get_output_storage_size` to allocate memory for the convolution engine. - * Then create a `WinogradConvolutionLayer`. - * - * Initialise the weights using `weights_transform.run(...)`. - * - * For each inference: - * 1. Transform the inputs to the Winograd domain using `input_transform.run(...)` - * 2. Perform a number of GEMMs using `gemms.run(...)` - * 3. Transform the output to the spatial domain using `output_transform.run(...)` - */ -template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols, - typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut, - WinogradRoots Roots> -class WinogradConvolutionLayer : public IWinogradConvolutionLayer -{ - public: - using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>; - using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>; - using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>; - using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>; - using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>; - - private: - static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1; - static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1; - static constexpr int N_GEMMS = InnerTileRows * InnerTileCols; - - const int _n_output_rows, _n_output_cols; - const int _kernel_matrix_stride, _kernel_matrix_row_stride; - const int _input_matrix_stride, _input_matrix_row_stride; - const int _output_matrix_stride, _output_matrix_row_stride; - const int _tile_rows, _tile_cols; - const int _m, _k, _n; - - WeightsTransform weights_transform; /** Operator to transform weights to Winograd domain. */ - InputTransform _input_transform; /** Operator to transform input to Winograd domain. */ - const arm_gemm::GemmArgs gemm_args; - arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms; /** Operator to perform multiple GEMMs. */ - OutputTransform _output_transform; /** Operator to transform output from Winograd domain. */ - - public: - - /** Determine how much memory (in units of TIn) to allocate for the - * transformed weights. - */ - static unsigned int get_weight_storage_size( - const int n_output_channels, /** Number of output feature maps. */ - const int n_input_channels /** Number of input feature maps. */ - ); - - static unsigned int get_weight_stride( - const int n_output_channels, /** Number of output feature maps. */ - const int n_input_channels /** Number of input feature maps. */ - ); - - static unsigned int get_weight_multi_stride( - const int n_output_channels, /** Number of output feature maps. */ - const int n_input_channels /** Number of input feature maps. */ - ); - - /** Determine how much memory (in units of TIn) to allocate for the - * transformed input. - */ - static unsigned int get_input_storage_size( - const int n_batches, /** Number of batches in the input tensor. */ - const int n_channels, /** Number of feature maps in the input tensor. */ - const int n_rows, /** Number of rows in each feature map. */ - const int n_cols, /** Number of columns in each feature map. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - /** Get the row stride for the A matrix in the Winograd domain. */ - static unsigned int get_input_stride( - const int n_batches, /** Number of batches in the input tensor. */ - const int n_channels, /** Number of feature maps in the input tensor. */ - const int n_rows, /** Number of rows in each feature map. */ - const int n_cols, /** Number of columns in each feature map. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - /** Get the stride between A matrices in the Winograd domain. */ - static unsigned int get_input_multi_stride( - const int n_batches, /** Number of batches in the input tensor. */ - const int n_channels, /** Number of feature maps in the input tensor. */ - const int n_rows, /** Number of rows in each feature map. */ - const int n_cols, /** Number of columns in each feature map. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - /** Determine how much memory (in units of TOut) to allocate for the - * (Winograd domain) output. - */ - static unsigned int get_output_storage_size( - const int n_batches, /** Number of batches in the output tensor. */ - const int n_rows, /** Number of rows in each feature map of the input tensor. */ - const int n_cols, /** Number of columns in each feature map of the input tensor. */ - const int n_output_channels, /** Number of feature maps in the output tensor. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - static unsigned int get_output_stride( - const int n_batches, /** Number of batches in the output tensor. */ - const int n_rows, /** Number of rows in each feature map of the input tensor. */ - const int n_cols, /** Number of columns in each feature map of the input tensor. */ - const int n_output_channels, /** Number of feature maps in the output tensor. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - static unsigned int get_output_multi_stride( - const int n_batches, /** Number of batches in the output tensor. */ - const int n_rows, /** Number of rows in each feature map of the input tensor. */ - const int n_cols, /** Number of columns in each feature map of the input tensor. */ - const int n_output_channels, /** Number of feature maps in the output tensor. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - /** Get the shape (rows, cols) of a feature map of the output tensor. */ - static std::pair<int, int> get_output_feature_map_shape( - const int n_input_rows, /** Number of rows in the input feature map. */ - const int n_input_cols, /** Number of columns in the input feature map. */ - const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */ - ); - - /** Create a new Winograd convolution layer. - */ - WinogradConvolutionLayer( - const arm_gemm::CPUInfo &cpuinfo, /** Describes CPU properties. */ - const int n_threads, /** Maximum number of threads used to execute the convolution. */ - const int n_batches, /** Number of batches in the input and output tensors. */ - const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */ - const int n_input_rows, /** Number of rows in a feature map of the input tensor. */ - const int n_input_cols, /** Number of columns in a feature map of the input tensor. */ - const int n_output_channels, /** Number of feature maps in the output tensor. */ - const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */ - const arm_gemm::Activation &activation, - const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */ - TInGEMM* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */ - const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */ - TInGEMM* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */ - const TOut* const biases, /** Pointer to biases vector. Pass nullptr if no bias is provided. */ - TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */ - TOutGEMM* const winograd_output, /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */ - const bool pretranspose_B=true, /** Hint that the B matrix can be pretransposed. */ - arm_gemm::GemmConfig *gemm_cfg=nullptr /** Pointer to GEMM configuration. */ - ); - - /* Utility methods for interacting with the layer. */ - unsigned int weight_transform_get_window(void) const; - void weight_transform_run(const unsigned int start, const unsigned int stop); - - IInputTransform& input_transform(void); - IOutputTransform& output_transform(void); - - /* Get a pointer to the GEMM underlying the Winograd transform. */ - arm_gemm::IGemmCommon *gemm(void); -}; - -} diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h deleted file mode 100644 index 4861559695..0000000000 --- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H -#define ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H - -#include "arm_compute/core/NEON/wrapper/wrapper.h" - -namespace arm_compute -{ -namespace detail -{ -/** Dummy activation object */ -template <typename T, int S> -struct dummy -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - - /** Construct a dummy activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit dummy(ActivationLayerInfo act_info) - { - ARM_COMPUTE_UNUSED(act_info); - } - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - ARM_COMPUTE_UNUSED(vval); - } -}; -/** Linear activation object */ -template <typename T, int S> -struct linear -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a Linear activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit linear(ActivationLayerInfo act_info) - : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})), - vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{})) - { - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vmla(vval, valpha, vbeta); - } - - /** Vector of alphas. */ - const ExactType valpha; - /** Vector of betas. */ - const ExactType vbeta; -}; -/** Square activation object */ -template <typename T, int S> -struct square -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a Square activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit square(ActivationLayerInfo act_info) - { - ARM_COMPUTE_UNUSED(act_info); - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vmul(vval, vval); - } -}; -/** Logistic activation object */ -template <typename T, int S> -struct logistic -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a Logistic activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit logistic(ActivationLayerInfo act_info) - : vone(wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{})) - { - ARM_COMPUTE_UNUSED(act_info); - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vinv(wrapper::vadd(vone, wrapper::vexpq(wrapper::vneg(vval)))); - } - - /** Vector of ones. */ - const ExactType vone; -}; -/** RELU activation object */ -template <typename T, int S> -struct relu -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a RELU activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit relu(ActivationLayerInfo act_info) - : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})) - { - ARM_COMPUTE_UNUSED(act_info); - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vmax(vzero, vval); - } - - /** Vector of zeroes. */ - const ExactType vzero; -}; -/** Bounded RELU activation object */ -template <typename T, int S> -struct brelu -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a bounded RELU activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit brelu(ActivationLayerInfo act_info) - : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})), - valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})) - { - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval)); - } - - /** Vector of zeroes. */ - const ExactType vzero; - /** Vector of alphas. */ - const ExactType valpha; -}; -/** Lower-Upper Bounded RELU activation object */ -template <typename T, int S> -struct lubrelu -{ - /** NEON vector type. */ - using ExactType = typename wrapper::traits::neon_vector<T, S>::type; - /** NEON vector tag type. */ - using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; - - /** Construct a lower-upper bounded RELU activation object. - * - * @param[in] act_info Activation layer information. - */ - explicit lubrelu(ActivationLayerInfo act_info) - : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})), - vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{})) - { - } - - /** Run activation function. - * - * @param[in] vval Vector of values. - */ - void operator()(ExactType &vval) - { - vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval)); - } - - /** Vector of alphas. */ - const ExactType valpha; - /** Vector of betas. */ - const ExactType vbeta; -}; -} // namespace detail -} // namespace arm_compute -#endif /* ARM_COMPUTE_DETAIL_NEACTIVATION_FUNCTION_DETAIL_H */ diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h deleted file mode 100644 index d756a9a98f..0000000000 --- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolution3x3.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H -#define ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace detail -{ -inline float32x4x3_t load_matrix_row(const float *ptr) -{ - const float32x4x3_t r = - { - { - vld1q_dup_f32(ptr), - vld1q_dup_f32(1 + ptr), - vld1q_dup_f32(2 + ptr) - } - }; - return r; -} - -template <unsigned int stridex> -float32x4x2_t convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2); - -template <> -inline float32x4x2_t convolve_3x3<1>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) -{ - const float32x4x3_t vtop = - { - { - vld1q_f32(in_top), - vld1q_f32(in_top + 4), - vld1q_f32(in_top + 8) - } - }; - const float32x4x3_t vmid = - { - { - vld1q_f32(in_mid), - vld1q_f32(in_mid + 4), - vld1q_f32(in_mid + 8) - } - }; - const float32x4x3_t vlow = - { - { - vld1q_f32(in_low), - vld1q_f32(in_low + 4), - vld1q_f32(in_low + 8) - } - }; - float32x4x2_t out = - { - { - vmulq_f32(vtop.val[0], m0.val[0]), - vmulq_f32(vtop.val[1], m0.val[0]) - } - }; - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 1), m0.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 2), m0.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 1), m1.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 2), m1.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 1), m2.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 2), m2.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 1), m0.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 2), m0.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vmid.val[1], m1.val[0]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 1), m1.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 2), m1.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vlow.val[1], m2.val[0]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 1), m2.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 2), m2.val[2]); - return out; -} - -template <> -inline float32x4x2_t convolve_3x3<2>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) -{ - float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3); - return out; -} - -template <> -inline float32x4x2_t convolve_3x3<3>(const float *in_top, const float *in_mid, const float *in_low, const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2) -{ - float32x4x2_t out = convolve_3x3<1>(in_top, in_mid, in_low, m0, m1, m2); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1); - return out; -} - -template <unsigned int stridex> -void store_results(float *buffer, const float32x4x2_t &values); - -template <> -void store_results<1>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, values.val[0]); - vst1q_f32(buffer + 4, values.val[1]); -} - -template <> -void store_results<2>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, values.val[0]); -} - -template <> -void store_results<3>(float *buffer, const float32x4x2_t &values) -{ - vst1_f32(buffer, vget_low_f32(values.val[0])); -} - -template <unsigned int stridex> -int get_input_num_elems_processed(unsigned int num_elems_written_per_iteration); - -template <> -int get_input_num_elems_processed<1>(unsigned int num_elems_written_per_iteration) -{ - return num_elems_written_per_iteration; -} - -template <> -int get_input_num_elems_processed<2>(unsigned int num_elems_written_per_iteration) -{ - return num_elems_written_per_iteration << 1; -} - -template <> -int get_input_num_elems_processed<3>(unsigned int num_elems_written_per_iteration) -{ - return num_elems_written_per_iteration * 3; -} -} -} // namespace arm_compute -#endif /* ARM_COMPUTE_NECONVOLUTIONKERNEL3x3_H */
\ No newline at end of file diff --git a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h b/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h deleted file mode 100644 index d4cbc7f4af..0000000000 --- a/arm_compute/core/NEON/kernels/detail/NEDirectConvolutionDetail.h +++ /dev/null @@ -1,965 +0,0 @@ -/* - * Copyright (c) 2017-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H -#define ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H - -#include "arm_compute/core/AccessWindowStatic.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" -#include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/utils/misc/Requires.h" - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace detail -{ -/** Loads a 3x3 matrix as a row (float). - * - * @param[in] ptr Pointer to a float 3x3 matrix. - * @param[in] weights_offset (Optional) Weights quantization offset. - * - * @return The loaded matrix. - */ -inline float32x4x3_t load_matrix_row(const float *ptr, int weights_offset = 0) -{ - ARM_COMPUTE_UNUSED(weights_offset); - const float32x4x3_t r = - { - { - vld1q_dup_f32(ptr), - vld1q_dup_f32(1 + ptr), - vld1q_dup_f32(2 + ptr) - } - }; - return r; -} - -/** Loads a 3x3 matrix as a row (uint8_t/int8_t). - * - * @param[in] ptr Pointer to a uint8_t/int8_t 3x3 matrix. - * @param[in] weights_offset (Optional) Weights quantization offset. - * - * @return The loaded matrix. - */ -template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > -inline int32x4x3_t load_matrix_row(const T *ptr, int weights_offset = 0) -{ - const int32x4_t v_weights_offset = vdupq_n_s32(weights_offset); - - /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes: - r.val[0] contains the first element, r.val[1] the second element and r.val[2] the third element (in all lanes) */ - int32x4x3_t r = - { - { - vaddq_s32(v_weights_offset, vdupq_n_s32(*ptr)), - vaddq_s32(v_weights_offset, vdupq_n_s32(*(ptr + 1))), - vaddq_s32(v_weights_offset, vdupq_n_s32(*(ptr + 2))) - } - }; - return r; -} - -/** Stores a float32x4x2_t array into a memory location. - * - * @param[in] buffer Pointer to the memory location where the values will be stored. - * @param[in] values Values that will be stored. - * - */ -template <unsigned int stridex> -void store_results(float *buffer, const float32x4x2_t &values); - -template <> -inline void store_results<1>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, values.val[0]); - vst1q_f32(buffer + 4, values.val[1]); -} - -template <> -inline void store_results<2>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, values.val[0]); -} - -template <> -inline void store_results<3>(float *buffer, const float32x4x2_t &values) -{ - vst1_f32(buffer, vget_low_f32(values.val[0])); -} - -/** Stores a uint32_t array into a memory location. - * - * @param[in] buffer Pointer to the memory location where the values will be stored. - * @param[in] values Values that will be stored. - * - */ -template <unsigned int stridex> -void store_results(int32_t *buffer, const int32x4x2_t &values); - -template <> -inline void store_results<1>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1q_s32(buffer, values.val[0]); - vst1q_s32(buffer + 4, values.val[1]); -} - -template <> -inline void store_results<2>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1q_s32(buffer, values.val[0]); -} - -template <> -inline void store_results<3>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1_s32(buffer, vget_low_s32(values.val[0])); -} - -template <unsigned int stridex> -inline void accumulate_results(float *buffer, const float32x4x2_t &values); - -template <> -inline void accumulate_results<1>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0])); - vst1q_f32(buffer + 4, vaddq_f32(vld1q_f32(buffer + 4), values.val[1])); -} - -template <> -inline void accumulate_results<2>(float *buffer, const float32x4x2_t &values) -{ - vst1q_f32(buffer, vaddq_f32(vld1q_f32(buffer), values.val[0])); -} - -template <> -inline void accumulate_results<3>(float *buffer, const float32x4x2_t &values) -{ - vst1_f32(buffer, vadd_f32(vld1_f32(buffer), vget_low_f32(values.val[0]))); -} - -template <unsigned int stridex> -void accumulate_results(int32_t *buffer, const int32x4x2_t &values); - -template <> -inline void accumulate_results<1>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1q_s32(buffer, vaddq_s32(vld1q_s32(buffer), values.val[0])); - vst1q_s32(buffer + 4, vaddq_s32(vld1q_s32(buffer + 4), values.val[1])); -} - -template <> -inline void accumulate_results<2>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1q_s32(buffer, vaddq_s32(vld1q_s32(buffer), values.val[0])); -} - -template <> -inline void accumulate_results<3>(int32_t *buffer, const int32x4x2_t &values) -{ - vst1_s32(buffer, vadd_s32(vld1_s32(buffer), vget_low_s32(values.val[0]))); -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Stores a float16x8x2_t array into a memory location. - * - * @param[in] buffer Pointer to the memory location where the values will be stored. - * @param[in] values Values that will be stored. - * - */ -template <unsigned int stridex> -void store_results(float16_t *buffer, const float16x8x2_t &values); - -template <> -inline void store_results<1>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1q_f16(buffer, values.val[0]); - vst1q_f16(buffer + 8, values.val[1]); -} - -template <> -inline void store_results<2>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1q_f16(buffer, values.val[0]); -} - -template <> -inline void store_results<3>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1_f16(buffer, vget_low_f16(values.val[0])); -} - -template <unsigned int stridex> -inline void accumulate_results(float16_t *buffer, const float16x8x2_t &values); - -template <> -inline void accumulate_results<1>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0])); - vst1q_f16(buffer + 8, vaddq_f16(vld1q_f16(buffer + 8), values.val[1])); -} - -template <> -inline void accumulate_results<2>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1q_f16(buffer, vaddq_f16(vld1q_f16(buffer), values.val[0])); -} - -template <> -inline void accumulate_results<3>(float16_t *buffer, const float16x8x2_t &values) -{ - vst1_f16(buffer, vadd_f16(vld1_f16(buffer), vget_low_f16(values.val[0]))); -} -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - -/** Perform a 3x3 convolution for 4 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] input_offset (Optional) Input quantization offset. - * - */ -inline float32x4_t single_convolve_3x3_dilation(const float *in_top, const float *in_mid, const float *in_low, - const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, - const size_t dilation_x, int input_offset) -{ - ARM_COMPUTE_UNUSED(input_offset); - - const float32x4x3_t vtop = - { - { - vld1q_f32(in_top), - vld1q_f32(in_top + dilation_x), - vld1q_f32(in_top + 2 * dilation_x) - } - }; - const float32x4x3_t vmid = - { - { - vld1q_f32(in_mid), - vld1q_f32(in_mid + dilation_x), - vld1q_f32(in_mid + 2 * dilation_x) - } - }; - const float32x4x3_t vlow = - { - { - vld1q_f32(in_low), - vld1q_f32(in_low + dilation_x), - vld1q_f32(in_low + 2 * dilation_x) - } - }; - float32x4_t out = vmulq_f32(vtop.val[0], m0.val[0]); - out = vmlaq_f32(out, vtop.val[1], m0.val[1]); - out = vmlaq_f32(out, vtop.val[2], m0.val[2]); - - out = vmlaq_f32(out, vmid.val[0], m1.val[0]); - out = vmlaq_f32(out, vmid.val[1], m1.val[1]); - out = vmlaq_f32(out, vmid.val[2], m1.val[2]); - - out = vmlaq_f32(out, vlow.val[0], m2.val[0]); - out = vmlaq_f32(out, vlow.val[1], m2.val[1]); - out = vmlaq_f32(out, vlow.val[2], m2.val[2]); - - return out; -} - -/** Perform a 3x3 convolution for 8 consecutive elements on float32 when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset (Optional) Input quantization offset. - * - */ -inline float32x4x2_t convolve_3x3_dilation(const float *in_top, const float *in_mid, const float *in_low, - const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, - const size_t dilation_x, unsigned int stridex, int input_offset = 0) -{ - ARM_COMPUTE_ERROR_ON(stridex > 3); - float32x4x2_t out = - { - { - single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset), - single_convolve_3x3_dilation(in_top + 4, in_mid + 4, in_low + 4, m0, m1, m2, dilation_x, input_offset) - } - }; - - if(stridex == 2) - { - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 2), out.val[0], 1); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 0), out.val[0], 2); - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[1], 2), out.val[0], 3); - } - else if(stridex == 3) - { - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1); - } - - return out; -} - -/** Perform a convolve3x3 on float32. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[out] out_ptr Pointer to the output. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset (Optional) Input quantization offset. - * - */ -template <bool accumulate> -void convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, - const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, - unsigned int stridex, int input_offset = 0); - -template <bool accumulate> -inline void convolve_3x3(const float *in_top, const float *in_mid, const float *in_low, float *out_ptr, - const float32x4x3_t &m0, const float32x4x3_t &m1, const float32x4x3_t &m2, - unsigned int stridex, int input_offset) -{ - ARM_COMPUTE_UNUSED(input_offset); - ARM_COMPUTE_ERROR_ON(stridex > 3); - - float32x4x2_t out = - { - { - vdupq_n_f32(0.f), - vdupq_n_f32(0.f) - } - }; - if(stridex == 2) - { - const float32x4x2_t vtop = vld2q_f32(in_top); - const float32x4x2_t vmid = vld2q_f32(in_mid); - const float32x4x2_t vlow = vld2q_f32(in_low); - const float32x4_t vtop_end = vld1q_f32(in_top + 8); - const float32x4_t vmid_end = vld1q_f32(in_mid + 8); - const float32x4_t vlow_end = vld1q_f32(in_low + 8); - - out.val[0] = vmulq_f32(vtop.val[0], m0.val[0]); - - out.val[0] = vmlaq_f32(out.val[0], vtop.val[1], m0.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop_end, 1), m0.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vmid.val[1], m1.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid_end, 1), m1.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vlow.val[1], m2.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow_end, 1), m2.val[2]); - - accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out); - } - else - { - const float32x4x3_t vtop = - { - { - vld1q_f32(in_top), - vld1q_f32(in_top + 4), - vld1q_f32(in_top + 8) - } - }; - const float32x4x3_t vmid = - { - { - vld1q_f32(in_mid), - vld1q_f32(in_mid + 4), - vld1q_f32(in_mid + 8) - } - }; - const float32x4x3_t vlow = - { - { - vld1q_f32(in_low), - vld1q_f32(in_low + 4), - vld1q_f32(in_low + 8) - } - }; - out.val[0] = vmulq_f32(vtop.val[0], m0.val[0]); - out.val[1] = vmulq_f32(vtop.val[1], m0.val[0]); - - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 1), m0.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vtop.val[0], vtop.val[1], 2), m0.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vmid.val[0], m1.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 1), m1.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vmid.val[0], vmid.val[1], 2), m1.val[2]); - - out.val[0] = vmlaq_f32(out.val[0], vlow.val[0], m2.val[0]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 1), m2.val[1]); - out.val[0] = vmlaq_f32(out.val[0], vextq_f32(vlow.val[0], vlow.val[1], 2), m2.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 1), m0.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vtop.val[1], vtop.val[2], 2), m0.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vmid.val[1], m1.val[0]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 1), m1.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vmid.val[1], vmid.val[2], 2), m1.val[2]); - - out.val[1] = vmlaq_f32(out.val[1], vlow.val[1], m2.val[0]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 1), m2.val[1]); - out.val[1] = vmlaq_f32(out.val[1], vextq_f32(vlow.val[1], vlow.val[2], 2), m2.val[2]); - - if(stridex == 3) - { - out.val[0] = vsetq_lane_f32(vgetq_lane_f32(out.val[0], 3), out.val[0], 1); - accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out); - } - else - { - accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out); - } - } -} - -/** Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] input_offset Input quantization offset. - * - */ -template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > -inline int32x4_t single_convolve_3x3_dilation(const T *in_top, const T *in_mid, const T *in_low, - const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, - size_t dilation_x, int32_t input_offset) -{ - using VectorType = typename std::conditional<std::is_same<T, uint8_t>::value, uint8x8x3_t, int8x8x3_t>::type; - using OutputTagType = typename wrapper::traits::neon_bitvector_tag_t<int32_t, wrapper::traits::BitWidth::W128>; - - const int32x4_t v_input_offset = wrapper::vdup_n(input_offset, OutputTagType{}); - - const VectorType vtop = - { - { - wrapper::vload(in_top), - wrapper::vload(in_top + dilation_x), - wrapper::vload(in_top + 2 * dilation_x) - } - }; - const VectorType vmid = - { - { - wrapper::vload(in_mid), - wrapper::vload(in_mid + dilation_x), - wrapper::vload(in_mid + 2 * dilation_x) - } - }; - const VectorType vlow = - { - { - wrapper::vload(in_low), - wrapper::vload(in_low + dilation_x), - wrapper::vload(in_low + 2 * dilation_x) - } - }; - - const int32x4x3_t vtop_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[1])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[2])))), - } - }; - const int32x4x3_t vmid_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[1])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[2])))), - } - }; - const int32x4x3_t vlow_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[1])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[2])))), - } - }; - - int32x4_t out = wrapper::vmul(vtop_s32.val[0], m0.val[0]); - out = wrapper::vmla(out, vtop_s32.val[1], m0.val[1]); - out = wrapper::vmla(out, vtop_s32.val[2], m0.val[2]); - - out = wrapper::vmla(out, vmid_s32.val[0], m1.val[0]); - out = wrapper::vmla(out, vmid_s32.val[1], m1.val[1]); - out = wrapper::vmla(out, vmid_s32.val[2], m1.val[2]); - - out = wrapper::vmla(out, vlow_s32.val[0], m2.val[0]); - out = wrapper::vmla(out, vlow_s32.val[1], m2.val[1]); - out = wrapper::vmla(out, vlow_s32.val[2], m2.val[2]); - - return out; -} - -/** Perform a 3x3 convolution for 4 consecutive 8-bit elements when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset Input quantization offset. - * - */ -template < typename T, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > -inline int32x4x2_t convolve_3x3_dilation(const T *in_top, const T *in_mid, const T *in_low, const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, - const size_t dilation_x, unsigned int stridex, int input_offset) -{ - ARM_COMPUTE_ERROR_ON(stridex > 3); - int32x4x2_t out = - { - { - single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset), - single_convolve_3x3_dilation(in_top + 4, in_mid + 4, in_low + 4, m0, m1, m2, dilation_x, input_offset) - } - }; - - if(stridex == 2) - { - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 2), out.val[0], 1); - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 0), out.val[0], 2); - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 2), out.val[0], 3); - } - else if(stridex == 3) - { - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 3), out.val[0], 1); - } - return out; -} - -/** Perform a convolve3x3 on 8-bit elements - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[out] out_ptr Pointer to the output. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset Input quantization offset. - * - */ -template < bool accumulate, typename T1, typename T2, REQUIRES_TA(std::is_same<T1, uint8_t>::value || std::is_same<T1, int8_t>::value) > -void convolve_3x3(const T1 *in_top, const T1 *in_mid, const T1 *in_low, T2 *out_ptr, - const int32x4x3_t &m0, const int32x4x3_t &m1, const int32x4x3_t &m2, - unsigned int stridex, int32_t input_offset) -{ - ARM_COMPUTE_ERROR_ON(stridex > 3); - using VectorType = typename std::conditional<std::is_same<T1, uint8_t>::value, uint8x8x2_t, int8x8x2_t>::type; - using OutputTagType = typename wrapper::traits::neon_bitvector_tag_t<int32_t, wrapper::traits::BitWidth::W128>; - - const int32x4_t v_input_offset = wrapper::vdup_n(input_offset, OutputTagType{}); - - const VectorType vtop = - { - { - wrapper::vload(in_top), - wrapper::vload(in_top + 8) - } - }; - const VectorType vmid = - { - { - wrapper::vload(in_mid), - wrapper::vload(in_mid + 8) - } - }; - const VectorType vlow = - { - { - wrapper::vload(in_low), - wrapper::vload(in_low + 8) - } - }; - - const int32x4x3_t vtop_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vtop.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vtop.val[1])))), - } - }; - const int32x4x3_t vmid_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vmid.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vmid.val[1])))), - } - }; - const int32x4x3_t vlow_s32 = - { - { - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgethigh(wrapper::vmovl(vlow.val[0])))), - wrapper::vaddw(v_input_offset, wrapper::vreinterpret(wrapper::vgetlow(wrapper::vmovl(vlow.val[1])))), - } - }; - - int32x4x2_t out - { - { - wrapper::vdup_n(static_cast<int32_t>(0), OutputTagType{}), - wrapper::vdup_n(static_cast<int32_t>(0), OutputTagType{}), - } - }; - - // 0 - out.val[0] = wrapper::vmla(out.val[0], vtop_s32.val[0], m0.val[0]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vtop_s32.val[0], vtop_s32.val[1]), m0.val[1]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vtop_s32.val[0], vtop_s32.val[1]), m0.val[2]); - - out.val[0] = wrapper::vmla(out.val[0], vmid_s32.val[0], m1.val[0]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vmid_s32.val[0], vmid_s32.val[1]), m1.val[1]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vmid_s32.val[0], vmid_s32.val[1]), m1.val[2]); - - out.val[0] = wrapper::vmla(out.val[0], vlow_s32.val[0], m2.val[0]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_1(vlow_s32.val[0], vlow_s32.val[1]), m2.val[1]); - out.val[0] = wrapper::vmla(out.val[0], wrapper::vext_2(vlow_s32.val[0], vlow_s32.val[1]), m2.val[2]); - - // 1 - out.val[1] = wrapper::vmla(out.val[1], vtop_s32.val[1], m0.val[0]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vtop_s32.val[1], vtop_s32.val[2]), m0.val[1]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vtop_s32.val[1], vtop_s32.val[2]), m0.val[2]); - - out.val[1] = wrapper::vmla(out.val[1], vmid_s32.val[1], m1.val[0]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vmid_s32.val[1], vmid_s32.val[2]), m1.val[1]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vmid_s32.val[1], vmid_s32.val[2]), m1.val[2]); - - out.val[1] = wrapper::vmla(out.val[1], vlow_s32.val[1], m2.val[0]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_1(vlow_s32.val[1], vlow_s32.val[2]), m2.val[1]); - out.val[1] = wrapper::vmla(out.val[1], wrapper::vext_2(vlow_s32.val[1], vlow_s32.val[2]), m2.val[2]); - - if(stridex == 1) - { - accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out); - } - else if(stridex == 2) - { - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 2), out.val[0], 1); - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 0), out.val[0], 2); - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[1], 2), out.val[0], 3); - - accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out); - } - else if(stridex == 3) - { - out.val[0] = wrapper::vsetlane(wrapper::vgetlane(out.val[0], 3), out.val[0], 1); - accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out); - } -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -/** Loads a 3x3 matrix as a row (float16_t). - * - * @param[in] ptr Pointer to a float 3x3 matrix. - * - * @return The loaded matrix. - */ -inline float16x8x3_t load_matrix_row(const float16_t *ptr, int weights_offset = 0) -{ - ARM_COMPUTE_UNUSED(weights_offset); - /* ptr is a pointer to a row in a 3x3 matrix, the function returns 3 vectors holding exactly the same value in all lanes: - r.val[0] contains the first element, r.val[1] the second element and r.val[2] the third element (in all lanes) */ - const float16x8x3_t r = - { - { - vld1q_dup_f16(ptr), - vld1q_dup_f16(1 + ptr), - vld1q_dup_f16(2 + ptr) - } - }; - return r; -} - -/** Perform a 3x3 convolution for 8 consecutive elements on float16 when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] input_offset (Optional)Input quantization offset. - * - */ -inline float16x8_t single_convolve_3x3_dilation(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, - const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2, - const size_t dilation_x, int input_offset = 0) -{ - ARM_COMPUTE_UNUSED(input_offset); - const float16x8x3_t vtop = - { - { - vld1q_f16(in_top), - vld1q_f16(in_top + dilation_x), - vld1q_f16(in_top + 2 * dilation_x) - } - }; - const float16x8x3_t vmid = - { - { - vld1q_f16(in_mid), - vld1q_f16(in_mid + dilation_x), - vld1q_f16(in_mid + 2 * dilation_x) - } - }; - const float16x8x3_t vlow = - { - { - vld1q_f16(in_low), - vld1q_f16(in_low + dilation_x), - vld1q_f16(in_low + 2 * dilation_x) - } - }; - float16x8_t out = vmulq_f16(vtop.val[0], m0.val[0]); - out = vaddq_f16(out, vmulq_f16(vtop.val[1], m0.val[1])); - out = vaddq_f16(out, vmulq_f16(vtop.val[2], m0.val[2])); - - out = vaddq_f16(out, vmulq_f16(vmid.val[0], m1.val[0])); - out = vaddq_f16(out, vmulq_f16(vmid.val[1], m1.val[1])); - out = vaddq_f16(out, vmulq_f16(vmid.val[2], m1.val[2])); - - out = vaddq_f16(out, vmulq_f16(vlow.val[0], m2.val[0])); - out = vaddq_f16(out, vmulq_f16(vlow.val[1], m2.val[1])); - out = vaddq_f16(out, vmulq_f16(vlow.val[2], m2.val[2])); - - return out; -} - -/** Perform a 3x3 convolution for 16 consecutive elements on float16 when dilation.x() or dilation.y() is not 1. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] dilation_x Dilation, in elements across x. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset (Optional) Input quantization offset. - * - */ -inline float16x8x2_t convolve_3x3_dilation(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, - const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2, - const size_t dilation_x, unsigned int stridex, int input_offset = 0) -{ - float16x8x2_t out = - { - { - single_convolve_3x3_dilation(in_top, in_mid, in_low, m0, m1, m2, dilation_x, input_offset), - single_convolve_3x3_dilation(in_top + 8, in_mid + 8, in_low + 8, m0, m1, m2, dilation_x, input_offset) - } - }; - - if(stridex == 2) - { - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 2), out.val[0], 1); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 4), out.val[0], 2); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 3); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 0), out.val[0], 4); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 2), out.val[0], 5); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 4), out.val[0], 6); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 6), out.val[0], 7); - } - else if(stridex == 3) - { - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3); - } - - return out; -} - -/** Perform a convolve3x3 on float16. - * - * @param[in] in_top Pointer to the first row of the input. - * @param[in] in_mid Pointer to the second row of the input. - * @param[in] in_low Pointer to the third row of the input. - * @param[out] out_ptr Pointer to the output. - * @param[in] m0 First row of the filter. - * @param[in] m1 Second row of the filter. - * @param[in] m2 Third row of the filter. - * @param[in] stridex Stride value in elements across x. - * @param[in] input_offset (Optional) Input quantization offset. - * - */ -template <bool accumulate> -inline void convolve_3x3(const float16_t *in_top, const float16_t *in_mid, const float16_t *in_low, float16_t *out_ptr, - const float16x8x3_t &m0, const float16x8x3_t &m1, const float16x8x3_t &m2, - unsigned int stridex, int input_offset = 0) -{ - ARM_COMPUTE_UNUSED(input_offset); - - float16x8x2_t out = - { - { - vdupq_n_f16(0), - vdupq_n_f16(0) - } - }; - if(stridex == 2) - { - const float16x8x2_t vtop = vld2q_f16(in_top); - const float16x8x2_t vmid = vld2q_f16(in_mid); - const float16x8x2_t vlow = vld2q_f16(in_low); - const float16x8_t vtop_end = vld1q_f16(in_top + 16); - const float16x8_t vmid_end = vld1q_f16(in_mid + 16); - const float16x8_t vlow_end = vld1q_f16(in_low + 16); - - out.val[0] = vmulq_f16(vtop.val[0], m0.val[0]); - - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vtop.val[1], m0.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop_end, 1), m0.val[2])); - - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[0], m1.val[0])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[1], m1.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid_end, 1), m1.val[2])); - - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[0], m2.val[0])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[1], m2.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow_end, 1), m2.val[2])); - - accumulate ? accumulate_results<2>(out_ptr, out) : store_results<2>(out_ptr, out); - } - else - { - const float16x8x3_t vtop = - { - { - vld1q_f16(in_top), - vld1q_f16(in_top + 8), - vld1q_f16(in_top + 16) - } - }; - const float16x8x3_t vmid = - { - { - vld1q_f16(in_mid), - vld1q_f16(in_mid + 8), - vld1q_f16(in_mid + 16) - } - }; - const float16x8x3_t vlow = - { - { - vld1q_f16(in_low), - vld1q_f16(in_low + 8), - vld1q_f16(in_low + 16) - } - }; - out.val[0] = vmulq_f16(vtop.val[0], m0.val[0]); - out.val[1] = vmulq_f16(vtop.val[1], m0.val[0]); - - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop.val[1], 1), m0.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vtop.val[0], vtop.val[1], 2), m0.val[2])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vmid.val[0], m1.val[0])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid.val[1], 1), m1.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vmid.val[0], vmid.val[1], 2), m1.val[2])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vlow.val[0], m2.val[0])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow.val[1], 1), m2.val[1])); - out.val[0] = vaddq_f16(out.val[0], vmulq_f16(vextq_f16(vlow.val[0], vlow.val[1], 2), m2.val[2])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vtop.val[1], vtop.val[2], 1), m0.val[1])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vtop.val[1], vtop.val[2], 2), m0.val[2])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vmid.val[1], m1.val[0])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vmid.val[1], vmid.val[2], 1), m1.val[1])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vmid.val[1], vmid.val[2], 2), m1.val[2])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vlow.val[1], m2.val[0])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vlow.val[1], vlow.val[2], 1), m2.val[1])); - out.val[1] = vaddq_f16(out.val[1], vmulq_f16(vextq_f16(vlow.val[1], vlow.val[2], 2), m2.val[2])); - - if(stridex == 3) - { - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 3), out.val[0], 1); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[0], 6), out.val[0], 2); - out.val[0] = vsetq_lane_f16(vgetq_lane_f16(out.val[1], 1), out.val[0], 3); - - accumulate ? accumulate_results<3>(out_ptr, out) : store_results<3>(out_ptr, out); - } - else - { - accumulate ? accumulate_results<1>(out_ptr, out) : store_results<1>(out_ptr, out); - } - } -} -#endif /** __ARM_FEATURE_FP16_VECTOR_ARITHMETIC **/ - -/** Get the number of elements processed on 3x3 convolution. - * - * @param[in] num_elems_written_per_iteration Number of elements written per iteration on 3x3 convolution. - * @param[in] stridex Stride value in elements across x. - * - * @return The number of elements processed. - */ -inline int get_input_num_elems_processed(unsigned int num_elems_written_per_iteration, unsigned int stridex) -{ - switch(stridex) - { - case 1: - return num_elems_written_per_iteration; - case 2: - return num_elems_written_per_iteration << 1; - case 3: - return num_elems_written_per_iteration * 3; - default: - ARM_COMPUTE_ERROR("stridex not supported"); - return 0; - } -} -} -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONDETAIL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/abs.h b/arm_compute/core/NEON/wrapper/intrinsics/abs.h deleted file mode 100644 index aff18166f5..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/abs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_ABS_H -#define ARM_COMPUTE_WRAPPER_ABS_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VABS_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vabs(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -#define VQABS_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vqabs(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -// Absolute: vabs{q}_<type>. Vd[i] = |Va[i]| -VABS_IMPL(int8x8_t, int8x8_t, vabs, s8) -VABS_IMPL(int16x4_t, int16x4_t, vabs, s16) -VABS_IMPL(int32x2_t, int32x2_t, vabs, s32) -VABS_IMPL(float32x2_t, float32x2_t, vabs, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VABS_IMPL(float16x4_t, float16x4_t, vabs, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VABS_IMPL(int8x16_t, int8x16_t, vabsq, s8) -VABS_IMPL(int16x8_t, int16x8_t, vabsq, s16) -VABS_IMPL(int32x4_t, int32x4_t, vabsq, s32) -VABS_IMPL(float32x4_t, float32x4_t, vabsq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VABS_IMPL(float16x8_t, float16x8_t, vabsq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -// Saturating absolute: vqabs{q}_<type>. Vd[i] = sat(|Va[i]|) -VQABS_IMPL(int8x8_t, int8x8_t, vqabs, s8) -VQABS_IMPL(int16x4_t, int16x4_t, vqabs, s16) -VQABS_IMPL(int32x2_t, int32x2_t, vqabs, s32) - -VQABS_IMPL(int8x16_t, int8x16_t, vqabsq, s8) -VQABS_IMPL(int16x8_t, int16x8_t, vqabsq, s16) -VQABS_IMPL(int32x4_t, int32x4_t, vqabsq, s32) - -#undef VABS_IMPL -#undef VQABS_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_ABS_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/add.h b/arm_compute/core/NEON/wrapper/intrinsics/add.h deleted file mode 100644 index 776e136a56..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/add.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_ADD_H -#define ARM_COMPUTE_WRAPPER_ADD_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VADD_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vadd(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VADD_IMPL(uint8x8_t, uint8x8_t, vadd, u8) -VADD_IMPL(int8x8_t, int8x8_t, vadd, s8) -VADD_IMPL(uint16x4_t, uint16x4_t, vadd, u16) -VADD_IMPL(int16x4_t, int16x4_t, vadd, s16) -VADD_IMPL(uint32x2_t, uint32x2_t, vadd, u32) -VADD_IMPL(int32x2_t, int32x2_t, vadd, s32) -VADD_IMPL(uint64x1_t, uint64x1_t, vadd, u64) -VADD_IMPL(int64x1_t, int64x1_t, vadd, s64) -VADD_IMPL(float32x2_t, float32x2_t, vadd, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VADD_IMPL(float16x4_t, float16x4_t, vadd, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VADD_IMPL(uint8x16_t, uint8x16_t, vaddq, u8) -VADD_IMPL(int8x16_t, int8x16_t, vaddq, s8) -VADD_IMPL(uint16x8_t, uint16x8_t, vaddq, u16) -VADD_IMPL(int16x8_t, int16x8_t, vaddq, s16) -VADD_IMPL(uint32x4_t, uint32x4_t, vaddq, u32) -VADD_IMPL(int32x4_t, int32x4_t, vaddq, s32) -VADD_IMPL(uint64x2_t, uint64x2_t, vaddq, u64) -VADD_IMPL(int64x2_t, int64x2_t, vaddq, s64) -VADD_IMPL(float32x4_t, float32x4_t, vaddq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VADD_IMPL(float16x8_t, float16x8_t, vaddq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#undef VADD_IMPL - -// VQADD: Vector saturating add (No notion of saturation for floating point) -#define VQADD_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vqadd(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VQADD_IMPL(uint8x8_t, uint8x8_t, vqadd, u8) -VQADD_IMPL(int8x8_t, int8x8_t, vqadd, s8) -VQADD_IMPL(uint16x4_t, uint16x4_t, vqadd, u16) -VQADD_IMPL(int16x4_t, int16x4_t, vqadd, s16) -VQADD_IMPL(uint32x2_t, uint32x2_t, vqadd, u32) -VQADD_IMPL(int32x2_t, int32x2_t, vqadd, s32) -VQADD_IMPL(uint64x1_t, uint64x1_t, vqadd, u64) -VQADD_IMPL(int64x1_t, int64x1_t, vqadd, s64) -VQADD_IMPL(float32x2_t, float32x2_t, vadd, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VQADD_IMPL(float16x4_t, float16x4_t, vadd, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VQADD_IMPL(uint8x16_t, uint8x16_t, vqaddq, u8) -VQADD_IMPL(int8x16_t, int8x16_t, vqaddq, s8) -VQADD_IMPL(uint16x8_t, uint16x8_t, vqaddq, u16) -VQADD_IMPL(int16x8_t, int16x8_t, vqaddq, s16) -VQADD_IMPL(uint32x4_t, uint32x4_t, vqaddq, u32) -VQADD_IMPL(int32x4_t, int32x4_t, vqaddq, s32) -VQADD_IMPL(uint64x2_t, uint64x2_t, vqaddq, u64) -VQADD_IMPL(int64x2_t, int64x2_t, vqaddq, s64) -VQADD_IMPL(float32x4_t, float32x4_t, vaddq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VQADD_IMPL(float16x8_t, float16x8_t, vaddq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#undef VQADD_IMPL - -// VADDW: Vector widening add -#define VADDW_IMPL(wtype, vtype, prefix, postfix) \ - inline wtype vaddw(const wtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VADDW_IMPL(uint16x8_t, uint8x8_t, vaddw, u8) -VADDW_IMPL(int16x8_t, int8x8_t, vaddw, s8) -VADDW_IMPL(uint32x4_t, uint16x4_t, vaddw, u16) -VADDW_IMPL(int32x4_t, int16x4_t, vaddw, s16) -VADDW_IMPL(uint64x2_t, uint32x2_t, vaddw, u32) -VADDW_IMPL(int64x2_t, int32x2_t, vaddw, s32) -#undef VADDW_IMPL - -// VADDL: Vector long add -#define VADDL_IMPL(wtype, vtype, prefix, postfix) \ - inline wtype vaddl(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VADDL_IMPL(uint16x8_t, uint8x8_t, vaddl, u8) -VADDL_IMPL(int16x8_t, int8x8_t, vaddl, s8) -VADDL_IMPL(uint32x4_t, uint16x4_t, vaddl, u16) -VADDL_IMPL(int32x4_t, int16x4_t, vaddl, s16) -VADDL_IMPL(uint64x2_t, uint32x2_t, vaddl, u32) -VADDL_IMPL(int64x2_t, int32x2_t, vaddl, s32) -#undef VADDL_IMPL - -#if defined(__aarch64__) -// VADDV: Across vector add -#define VADDV_IMPL(stype, vtype, prefix, postfix) \ - inline stype vaddv(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VADDV_IMPL(uint8_t, uint8x8_t, vaddv, u8) -VADDV_IMPL(int8_t, int8x8_t, vaddv, s8) -VADDV_IMPL(uint16_t, uint16x4_t, vaddv, u16) -VADDV_IMPL(int16_t, int16x4_t, vaddv, s16) -VADDV_IMPL(uint32_t, uint32x2_t, vaddv, u32) -VADDV_IMPL(int32_t, int32x2_t, vaddv, s32) -VADDV_IMPL(float, float32x2_t, vaddv, f32) - -VADDV_IMPL(uint8_t, uint8x16_t, vaddvq, u8) -VADDV_IMPL(int8_t, int8x16_t, vaddvq, s8) -VADDV_IMPL(uint16_t, uint16x8_t, vaddvq, u16) -VADDV_IMPL(int16_t, int16x8_t, vaddvq, s16) -VADDV_IMPL(uint32_t, uint32x4_t, vaddvq, u32) -VADDV_IMPL(int32_t, int32x4_t, vaddvq, s32) -VADDV_IMPL(uint64_t, uint64x2_t, vaddvq, u64) -VADDV_IMPL(int64_t, int64x2_t, vaddvq, s64) -VADDV_IMPL(float, float32x4_t, vaddvq, f32) -#undef VADDV_IMPL -#endif // defined(__aarch64__) - -// VPADDL: Signed add long pairwise -#define VPADDL_IMPL(ltype, vtype, prefix, postfix) \ - inline ltype vpaddl(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VPADDL_IMPL(uint16x4_t, uint8x8_t, vpaddl, u8) -VPADDL_IMPL(int16x4_t, int8x8_t, vpaddl, s8) -VPADDL_IMPL(uint32x2_t, uint16x4_t, vpaddl, u16) -VPADDL_IMPL(int32x2_t, int16x4_t, vpaddl, s16) -VPADDL_IMPL(uint64x1_t, uint32x2_t, vpaddl, u32) -VPADDL_IMPL(int64x1_t, int32x2_t, vpaddl, s32) - -VPADDL_IMPL(uint16x8_t, uint8x16_t, vpaddlq, u8) -VPADDL_IMPL(int16x8_t, int8x16_t, vpaddlq, s8) -VPADDL_IMPL(uint32x4_t, uint16x8_t, vpaddlq, u16) -VPADDL_IMPL(int32x4_t, int16x8_t, vpaddlq, s16) -VPADDL_IMPL(uint64x2_t, uint32x4_t, vpaddlq, u32) -VPADDL_IMPL(int64x2_t, int32x4_t, vpaddlq, s32) -#undef VPADDL_IMPL - -// VPADD: Add pairwise -#define VPADD_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vpadd(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VPADD_IMPL(uint8x8_t, uint8x8_t, vpadd, u8) -VPADD_IMPL(int8x8_t, int8x8_t, vpadd, s8) -VPADD_IMPL(uint16x4_t, uint16x4_t, vpadd, u16) -VPADD_IMPL(int16x4_t, int16x4_t, vpadd, s16) -VPADD_IMPL(uint32x2_t, uint32x2_t, vpadd, u32) -VPADD_IMPL(int32x2_t, int32x2_t, vpadd, s32) -VPADD_IMPL(float32x2_t, float32x2_t, vpadd, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VPADD_IMPL(float16x4_t, float16x4_t, vpadd, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VPADD_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_ADD_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/and.h b/arm_compute/core/NEON/wrapper/intrinsics/and.h deleted file mode 100644 index 1973c5593d..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/and.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_AND_H -#define ARM_COMPUTE_WRAPPER_AND_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VAND_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vand(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VAND_IMPL(uint8_t, uint8x8_t, vand, u8) -VAND_IMPL(int8_t, int8x8_t, vand, s8) -VAND_IMPL(uint16_t, uint16x4_t, vand, u16) -VAND_IMPL(int16_t, int16x4_t, vand, s16) -VAND_IMPL(uint32_t, uint32x2_t, vand, u32) -VAND_IMPL(int32_t, int32x2_t, vand, s32) -VAND_IMPL(uint64_t, uint64x1_t, vand, u64) -VAND_IMPL(int64_t, int64x1_t, vand, s64) - -VAND_IMPL(uint8_t, uint8x16_t, vandq, u8) -VAND_IMPL(int8_t, int8x16_t, vandq, s8) -VAND_IMPL(uint16_t, uint16x8_t, vandq, u16) -VAND_IMPL(int16_t, int16x8_t, vandq, s16) -VAND_IMPL(uint32_t, uint32x4_t, vandq, u32) -VAND_IMPL(int32_t, int32x4_t, vandq, s32) -VAND_IMPL(uint64_t, uint64x2_t, vandq, u64) -VAND_IMPL(int64_t, int64x2_t, vandq, s64) - -#undef VAND_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_AND_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/bsl.h b/arm_compute/core/NEON/wrapper/intrinsics/bsl.h deleted file mode 100644 index 3c26a9c786..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/bsl.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_BSL_H -#define ARM_COMPUTE_WRAPPER_BSL_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VBSL_IMPL(stype, vtype, ctype, prefix, postfix) \ - inline vtype vbsl(const ctype &a, const vtype &b, const vtype &c) \ - { \ - return prefix##_##postfix(a, b, c); \ - } - -VBSL_IMPL(uint8_t, uint8x8_t, uint8x8_t, vbsl, u8) -VBSL_IMPL(int8_t, int8x8_t, uint8x8_t, vbsl, s8) -VBSL_IMPL(uint16_t, uint16x4_t, uint16x4_t, vbsl, u16) -VBSL_IMPL(int16_t, int16x4_t, uint16x4_t, vbsl, s16) -VBSL_IMPL(uint32_t, uint32x2_t, uint32x2_t, vbsl, u32) -VBSL_IMPL(int32_t, int32x2_t, uint32x2_t, vbsl, s32) -VBSL_IMPL(float32x2_t, float32x2_t, uint32x2_t, vbsl, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VBSL_IMPL(float16x4_t, float16x4_t, uint16x4_t, vbsl, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VBSL_IMPL(uint8_t, uint8x16_t, uint8x16_t, vbslq, u8) -VBSL_IMPL(int8_t, int8x16_t, uint8x16_t, vbslq, s8) -VBSL_IMPL(uint16_t, uint16x8_t, uint16x8_t, vbslq, u16) -VBSL_IMPL(int16_t, int16x8_t, uint16x8_t, vbslq, s16) -VBSL_IMPL(uint32_t, uint32x4_t, uint32x4_t, vbslq, u32) -VBSL_IMPL(int32_t, int32x4_t, uint32x4_t, vbslq, s32) -VBSL_IMPL(float32x4_t, float32x4_t, uint32x4_t, vbslq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VBSL_IMPL(float16x8_t, float16x8_t, uint16x8_t, vbslq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VBSL_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_BSL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h b/arm_compute/core/NEON/wrapper/intrinsics/ceq.h deleted file mode 100644 index f8a8f91f73..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/ceq.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_CEQ_H -#define ARM_COMPUTE_WRAPPER_CEQ_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCEQ_IMPL(votype, vtype, prefix, postfix) \ - inline votype vceq(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VCEQ_IMPL(uint8x8_t, uint8x8_t, vceq, u8) -VCEQ_IMPL(uint8x8_t, int8x8_t, vceq, s8) -VCEQ_IMPL(uint16x4_t, uint16x4_t, vceq, u16) -VCEQ_IMPL(uint16x4_t, int16x4_t, vceq, s16) -VCEQ_IMPL(uint32x2_t, uint32x2_t, vceq, u32) -VCEQ_IMPL(uint32x2_t, int32x2_t, vceq, s32) -VCEQ_IMPL(uint32x2_t, float32x2_t, vceq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCEQ_IMPL(uint16x4_t, float16x4_t, vceq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VCEQ_IMPL(uint8x16_t, uint8x16_t, vceqq, u8) -VCEQ_IMPL(uint8x16_t, int8x16_t, vceqq, s8) -VCEQ_IMPL(uint16x8_t, uint16x8_t, vceqq, u16) -VCEQ_IMPL(uint16x8_t, int16x8_t, vceqq, s16) -VCEQ_IMPL(uint32x4_t, uint32x4_t, vceqq, u32) -VCEQ_IMPL(uint32x4_t, int32x4_t, vceqq, s32) -VCEQ_IMPL(uint32x4_t, float32x4_t, vceqq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCEQ_IMPL(uint16x8_t, float16x8_t, vceqq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VCEQ_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_CEQ_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cge.h b/arm_compute/core/NEON/wrapper/intrinsics/cge.h deleted file mode 100644 index bf231b8b46..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/cge.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_CGE_H -#define ARM_COMPUTE_WRAPPER_CGE_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCGE_IMPL(stype, vtype, rtype, prefix, postfix) \ - inline rtype vcge(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VCGE_IMPL(uint8_t, uint8x8_t, uint8x8_t, vcge, u8) -VCGE_IMPL(int8_t, int8x8_t, uint8x8_t, vcge, s8) -VCGE_IMPL(uint16_t, uint16x4_t, uint16x4_t, vcge, u16) -VCGE_IMPL(int16_t, int16x4_t, uint16x4_t, vcge, s16) -VCGE_IMPL(uint32_t, uint32x2_t, uint32x2_t, vcge, u32) -VCGE_IMPL(int32_t, int32x2_t, uint32x2_t, vcge, s32) -VCGE_IMPL(float32x2_t, float32x2_t, uint32x2_t, vcge, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCGE_IMPL(float16x4_t, float16x4_t, uint16x4_t, vcge, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VCGE_IMPL(uint8_t, uint8x16_t, uint8x16_t, vcgeq, u8) -VCGE_IMPL(int8_t, int8x16_t, uint8x16_t, vcgeq, s8) -VCGE_IMPL(uint16_t, uint16x8_t, uint16x8_t, vcgeq, u16) -VCGE_IMPL(int16_t, int16x8_t, uint16x8_t, vcgeq, s16) -VCGE_IMPL(uint32_t, uint32x4_t, uint32x4_t, vcgeq, u32) -VCGE_IMPL(int32_t, int32x4_t, uint32x4_t, vcgeq, s32) -VCGE_IMPL(float32x4_t, float32x4_t, uint32x4_t, vcgeq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCGE_IMPL(float16x8_t, float16x8_t, uint16x8_t, vcgeq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VCGE_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_CGE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cgt.h b/arm_compute/core/NEON/wrapper/intrinsics/cgt.h deleted file mode 100644 index 5202a5b21d..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/cgt.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_CGT_H -#define ARM_COMPUTE_WRAPPER_CGT_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCGT_IMPL(rtype, vtype, prefix, postfix) \ - inline rtype vcgt(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VCGT_IMPL(uint8x8_t, uint8x8_t, vcgt, u8) -VCGT_IMPL(uint8x8_t, int8x8_t, vcgt, s8) -VCGT_IMPL(uint16x4_t, uint16x4_t, vcgt, u16) -VCGT_IMPL(uint16x4_t, int16x4_t, vcgt, s16) -VCGT_IMPL(uint32x2_t, uint32x2_t, vcgt, u32) -VCGT_IMPL(uint32x2_t, int32x2_t, vcgt, s32) -VCGT_IMPL(uint32x2_t, float32x2_t, vcgt, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCGT_IMPL(uint16x4_t, float16x4_t, vcgt, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VCGT_IMPL(uint8x16_t, uint8x16_t, vcgtq, u8) -VCGT_IMPL(uint8x16_t, int8x16_t, vcgtq, s8) -VCGT_IMPL(uint16x8_t, uint16x8_t, vcgtq, u16) -VCGT_IMPL(uint16x8_t, int16x8_t, vcgtq, s16) -VCGT_IMPL(uint32x4_t, uint32x4_t, vcgtq, u32) -VCGT_IMPL(uint32x4_t, int32x4_t, vcgtq, s32) -VCGT_IMPL(uint32x4_t, float32x4_t, vcgtq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCGT_IMPL(uint16x8_t, float16x8_t, vcgtq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VCGT_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_CGT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/clt.h b/arm_compute/core/NEON/wrapper/intrinsics/clt.h deleted file mode 100644 index 4701ab7026..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/clt.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_CLT_H -#define ARM_COMPUTE_WRAPPER_CLT_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCLT_IMPL(votype, vtype, prefix, postfix) \ - inline votype vclt(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VCLT_IMPL(uint8x8_t, uint8x8_t, vclt, u8) -VCLT_IMPL(uint8x8_t, int8x8_t, vclt, s8) -VCLT_IMPL(uint16x4_t, uint16x4_t, vclt, u16) -VCLT_IMPL(uint16x4_t, int16x4_t, vclt, s16) -VCLT_IMPL(uint32x2_t, uint32x2_t, vclt, u32) -VCLT_IMPL(uint32x2_t, int32x2_t, vclt, s32) -VCLT_IMPL(uint32x2_t, float32x2_t, vclt, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCLT_IMPL(uint16x4_t, float16x4_t, vclt, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VCLT_IMPL(uint8x16_t, uint8x16_t, vcltq, u8) -VCLT_IMPL(uint8x16_t, int8x16_t, vcltq, s8) -VCLT_IMPL(uint16x8_t, uint16x8_t, vcltq, u16) -VCLT_IMPL(uint16x8_t, int16x8_t, vcltq, s16) -VCLT_IMPL(uint32x4_t, uint32x4_t, vcltq, u32) -VCLT_IMPL(uint32x4_t, int32x4_t, vcltq, s32) -VCLT_IMPL(uint32x4_t, float32x4_t, vcltq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCLT_IMPL(uint16x8_t, float16x8_t, vcltq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VCLT_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_CLT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/combine.h b/arm_compute/core/NEON/wrapper/intrinsics/combine.h deleted file mode 100644 index 9099e28fc4..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/combine.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_COMBINE_H -#define ARM_COMPUTE_WRAPPER_COMBINE_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCOMBINE_IMPL(rtype, vtype, prefix, postfix) \ - inline rtype vcombine(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VCOMBINE_IMPL(uint8x16_t, uint8x8_t, vcombine, u8) -VCOMBINE_IMPL(int8x16_t, int8x8_t, vcombine, s8) -VCOMBINE_IMPL(uint16x8_t, uint16x4_t, vcombine, u16) -VCOMBINE_IMPL(int16x8_t, int16x4_t, vcombine, s16) -VCOMBINE_IMPL(uint32x4_t, uint32x2_t, vcombine, u32) -VCOMBINE_IMPL(int32x4_t, int32x2_t, vcombine, s32) -VCOMBINE_IMPL(float32x4_t, float32x2_t, vcombine, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VCOMBINE_IMPL(float16x8_t, float16x4_t, vcombine, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VCOMBINE_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_COMBINE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h deleted file mode 100644 index 5ea9a5dedd..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_CVT_H -#define ARM_COMPUTE_WRAPPER_CVT_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VCVT_TO_F32_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ - template <typename T> \ - inline typename std::enable_if<std::is_same<T, float>::value, float32x4_t>::type \ - vcvt(const vtype &a) \ - { \ - return prefix##_##postfix1##_##postfix2(a); \ - } - -VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32) -VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32) -#undef VCVT_TO_F32_IMPL - -template <typename T> -inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type -vcvt(const float32x4_t &a) -{ - return vcvtq_u32_f32(a); -} - -template <typename T> -inline typename std::enable_if<std::is_same<T, int8_t>::value, int32x4_t>::type -vcvt(const float32x4_t &a) -{ - return vcvtq_s32_f32(a); -} - -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) -/** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector - * - * @param[in] inptr Pointer to the input memory to load values from - * @param[in,out] outptr Pointer to the output memory to store values to - */ -inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr) -{ - __asm __volatile( - "ldp q0, q1, [%[inptr]]\n" - ".inst 0xea16800\n" // BFCVTN v0, v0 - ".inst 0x4ea16820\n" // BFCVTN2 v0, v1 - "str q0, [%[outptr]]\n" - : [inptr] "+r"(inptr) - : [outptr] "r"(outptr) - : "v0", "v1", "memory"); -} -#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ - -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_CVT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/div.h b/arm_compute/core/NEON/wrapper/intrinsics/div.h deleted file mode 100644 index d49a9113b0..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/div.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_DIV_H -#define ARM_COMPUTE_WRAPPER_DIV_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#ifdef __aarch64__ - -#define VDIV_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vdiv(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } -VDIV_IMPL(float32x2_t, float32x2_t, vdiv, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDIV_IMPL(float16x4_t, float16x4_t, vdiv, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VDIV_IMPL(float32x4_t, float32x4_t, vdivq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDIV_IMPL(float16x8_t, float16x8_t, vdivq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#else // __aarch64__ - -#define VDIV_IMPL(stype, vtype, mul_prefix, inv_prefix, postfix) \ - inline vtype vdiv(const vtype &a, const vtype &b) \ - { \ - return mul_prefix##_##postfix(a, inv_prefix##_##postfix(b)); \ - } -VDIV_IMPL(float32x2_t, float32x2_t, vmul, vinv, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDIV_IMPL(float16x4_t, float16x4_t, vmul, vinv, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VDIV_IMPL(float32x4_t, float32x4_t, vmulq, vinvq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDIV_IMPL(float16x8_t, float16x8_t, vmulq, vinvq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#endif // __aarch64__ - -#undef VDIV_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_DIV_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h b/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h deleted file mode 100644 index ffbfde72c5..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/dup_n.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_DUP_N_H -#define ARM_COMPUTE_WRAPPER_DUP_N_H - -#include "arm_compute/core/NEON/wrapper/traits.h" - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VDUP_N_IMPL(stype, vtype, prefix, postfix, tag) \ - inline vtype vdup_n(stype value, tag) \ - { \ - return prefix##_##postfix(value); \ - } - -VDUP_N_IMPL(uint8_t, uint8x8_t, vdup_n, u8, traits::vector_64_tag) -VDUP_N_IMPL(int8_t, int8x8_t, vdup_n, s8, traits::vector_64_tag) -VDUP_N_IMPL(uint16_t, uint16x4_t, vdup_n, u16, traits::vector_64_tag) -VDUP_N_IMPL(int16_t, int16x4_t, vdup_n, s16, traits::vector_64_tag) -VDUP_N_IMPL(uint32_t, uint32x2_t, vdup_n, u32, traits::vector_64_tag) -VDUP_N_IMPL(int32_t, int32x2_t, vdup_n, s32, traits::vector_64_tag) -VDUP_N_IMPL(float, float32x2_t, vdup_n, f32, traits::vector_64_tag) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDUP_N_IMPL(float16_t, float16x4_t, vdup_n, f16, traits::vector_64_tag) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VDUP_N_IMPL(uint8_t, uint8x16_t, vdupq_n, u8, traits::vector_128_tag) -VDUP_N_IMPL(int8_t, int8x16_t, vdupq_n, s8, traits::vector_128_tag) -VDUP_N_IMPL(uint16_t, uint16x8_t, vdupq_n, u16, traits::vector_128_tag) -VDUP_N_IMPL(int16_t, int16x8_t, vdupq_n, s16, traits::vector_128_tag) -VDUP_N_IMPL(uint32_t, uint32x4_t, vdupq_n, u32, traits::vector_128_tag) -VDUP_N_IMPL(int32_t, int32x4_t, vdupq_n, s32, traits::vector_128_tag) -VDUP_N_IMPL(float, float32x4_t, vdupq_n, f32, traits::vector_128_tag) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VDUP_N_IMPL(float16_t, float16x8_t, vdupq_n, f16, traits::vector_128_tag) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VDUP_N_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_DUP_N_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/eor.h b/arm_compute/core/NEON/wrapper/intrinsics/eor.h deleted file mode 100644 index a0e7b681ab..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/eor.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_EOR_H -#define ARM_COMPUTE_WRAPPER_EOR_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VEOR_IMPL(vtype, prefix, postfix) \ - inline vtype veor(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VEOR_IMPL(uint8x8_t, veor, u8) -VEOR_IMPL(int8x8_t, veor, s8) -VEOR_IMPL(uint16x4_t, veor, u16) -VEOR_IMPL(int16x4_t, veor, s16) -VEOR_IMPL(uint32x2_t, veor, u32) -VEOR_IMPL(int32x2_t, veor, s32) - -VEOR_IMPL(uint8x16_t, veorq, u8) -VEOR_IMPL(int8x16_t, veorq, s8) -VEOR_IMPL(uint16x8_t, veorq, u16) -VEOR_IMPL(int16x8_t, veorq, s16) -VEOR_IMPL(uint32x4_t, veorq, u32) -VEOR_IMPL(int32x4_t, veorq, s32) - -#undef VEOR_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_EOR_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/exp.h b/arm_compute/core/NEON/wrapper/intrinsics/exp.h deleted file mode 100644 index 4b17ebd93f..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/exp.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_EXP_H -#define ARM_COMPUTE_WRAPPER_EXP_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VEXPQ_IMPL(vtype, postfix) \ - inline vtype vexpq(const vtype &a) \ - { \ - return vexpq_##postfix(a); \ - } - -#define VEXPQ_IMPL_INT(vtype, postfix) \ - inline vtype vexpq(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VEXPQ_IMPL(float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VEXPQ_IMPL(float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VEXPQ_IMPL_INT(int32x4_t, s32) -#undef VEXPQ_IMPL - -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_EXP_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/ext.h b/arm_compute/core/NEON/wrapper/intrinsics/ext.h deleted file mode 100644 index f2c3dcc901..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/ext.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_EXT_H -#define ARM_COMPUTE_WRAPPER_EXT_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VEXT_IMPL(vtype, prefix, postfix, size) \ - inline vtype vext_##size(vtype value_a, vtype value_b) \ - { \ - return prefix##_##postfix(value_a, value_b, size); \ - } - -VEXT_IMPL(uint8x8_t, vext, u8, 1) -VEXT_IMPL(uint8x8_t, vext, u8, 2) -VEXT_IMPL(int8x8_t, vext, s8, 1) -VEXT_IMPL(int8x8_t, vext, s8, 2) -VEXT_IMPL(uint16x4_t, vext, u16, 1) -VEXT_IMPL(uint16x4_t, vext, u16, 2) -VEXT_IMPL(int16x4_t, vext, s16, 1) -VEXT_IMPL(int16x4_t, vext, s16, 2) - -VEXT_IMPL(uint8x16_t, vextq, u8, 1) -VEXT_IMPL(uint8x16_t, vextq, u8, 2) -VEXT_IMPL(int8x16_t, vextq, s8, 1) -VEXT_IMPL(int8x16_t, vextq, s8, 2) -VEXT_IMPL(uint16x8_t, vextq, u16, 1) -VEXT_IMPL(uint16x8_t, vextq, u16, 2) -VEXT_IMPL(int16x8_t, vextq, s16, 1) -VEXT_IMPL(int16x8_t, vextq, s16, 2) -VEXT_IMPL(int32x4_t, vextq, s32, 1) -VEXT_IMPL(int32x4_t, vextq, s32, 2) - -#undef VEXT_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_EXT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h b/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h deleted file mode 100644 index 13d29677a6..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/gethigh.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_GET_HIGH_H -#define ARM_COMPUTE_WRAPPER_GET_HIGH_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VGETHIGH_IMPL(half_vtype, vtype, postfix) \ - inline half_vtype vgethigh(const vtype val) \ - { \ - return vget_high_##postfix(val); \ - } - -VGETHIGH_IMPL(uint8x8_t, uint8x16_t, u8) -VGETHIGH_IMPL(int8x8_t, int8x16_t, s8) -VGETHIGH_IMPL(uint16x4_t, uint16x8_t, u16) -VGETHIGH_IMPL(int16x4_t, int16x8_t, s16) -VGETHIGH_IMPL(uint32x2_t, uint32x4_t, u32) -VGETHIGH_IMPL(int32x2_t, int32x4_t, s32) -VGETHIGH_IMPL(float32x2_t, float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VGETHIGH_IMPL(float16x4_t, float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VGETHIGH_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_GET_HIGH_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h b/arm_compute/core/NEON/wrapper/intrinsics/getlane.h deleted file mode 100644 index 533bf63603..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/getlane.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_GET_LANE_H -#define ARM_COMPUTE_WRAPPER_GET_LANE_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VGETLANE_IMPL_8(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vget_lane_##postfix(vector, 0); \ - case 1: \ - return vget_lane_##postfix(vector, 1); \ - case 2: \ - return vget_lane_##postfix(vector, 2); \ - case 3: \ - return vget_lane_##postfix(vector, 3); \ - case 4: \ - return vget_lane_##postfix(vector, 4); \ - case 5: \ - return vget_lane_##postfix(vector, 5); \ - case 6: \ - return vget_lane_##postfix(vector, 6); \ - case 7: \ - return vget_lane_##postfix(vector, 7); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VGETLANE_IMPL_4(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vget_lane_##postfix(vector, 0); \ - case 1: \ - return vget_lane_##postfix(vector, 1); \ - case 2: \ - return vget_lane_##postfix(vector, 2); \ - case 3: \ - return vget_lane_##postfix(vector, 3); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VGETLANE_IMPL_2(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vget_lane_##postfix(vector, 0); \ - case 1: \ - return vget_lane_##postfix(vector, 1); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -VGETLANE_IMPL_8(uint8_t, uint8x8_t, u8) -VGETLANE_IMPL_8(int8_t, int8x8_t, s8) -VGETLANE_IMPL_4(uint16_t, uint16x4_t, u16) -VGETLANE_IMPL_4(int16_t, int16x4_t, s16) -VGETLANE_IMPL_2(uint32_t, uint32x2_t, u32) -VGETLANE_IMPL_2(int32_t, int32x2_t, s32) -VGETLANE_IMPL_2(float, float32x2_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VGETLANE_IMPL_4(float16_t, float16x4_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#define VGETQLANE_IMPL_16(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vgetq_lane_##postfix(vector, 0); \ - case 1: \ - return vgetq_lane_##postfix(vector, 1); \ - case 2: \ - return vgetq_lane_##postfix(vector, 2); \ - case 3: \ - return vgetq_lane_##postfix(vector, 3); \ - case 4: \ - return vgetq_lane_##postfix(vector, 4); \ - case 5: \ - return vgetq_lane_##postfix(vector, 5); \ - case 6: \ - return vgetq_lane_##postfix(vector, 6); \ - case 7: \ - return vgetq_lane_##postfix(vector, 7); \ - case 8: \ - return vgetq_lane_##postfix(vector, 8); \ - case 9: \ - return vgetq_lane_##postfix(vector, 9); \ - case 10: \ - return vgetq_lane_##postfix(vector, 10); \ - case 11: \ - return vgetq_lane_##postfix(vector, 11); \ - case 12: \ - return vgetq_lane_##postfix(vector, 12); \ - case 13: \ - return vgetq_lane_##postfix(vector, 13); \ - case 14: \ - return vgetq_lane_##postfix(vector, 14); \ - case 15: \ - return vgetq_lane_##postfix(vector, 15); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VGETQLANE_IMPL_8(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vgetq_lane_##postfix(vector, 0); \ - case 1: \ - return vgetq_lane_##postfix(vector, 1); \ - case 2: \ - return vgetq_lane_##postfix(vector, 2); \ - case 3: \ - return vgetq_lane_##postfix(vector, 3); \ - case 4: \ - return vgetq_lane_##postfix(vector, 4); \ - case 5: \ - return vgetq_lane_##postfix(vector, 5); \ - case 6: \ - return vgetq_lane_##postfix(vector, 6); \ - case 7: \ - return vgetq_lane_##postfix(vector, 7); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VGETQLANE_IMPL_4(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vgetq_lane_##postfix(vector, 0); \ - case 1: \ - return vgetq_lane_##postfix(vector, 1); \ - case 2: \ - return vgetq_lane_##postfix(vector, 2); \ - case 3: \ - return vgetq_lane_##postfix(vector, 3); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VGETQLANE_IMPL_2(stype, vtype, postfix) \ - inline stype vgetlane(const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vgetq_lane_##postfix(vector, 0); \ - case 1: \ - return vgetq_lane_##postfix(vector, 1); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -VGETQLANE_IMPL_16(uint8_t, uint8x16_t, u8) -VGETQLANE_IMPL_16(int8_t, int8x16_t, s8) -VGETQLANE_IMPL_8(uint16_t, uint16x8_t, u16) -VGETQLANE_IMPL_8(int16_t, int16x8_t, s16) -VGETQLANE_IMPL_4(uint32_t, uint32x4_t, u32) -VGETQLANE_IMPL_4(int32_t, int32x4_t, s32) -VGETQLANE_IMPL_4(float, float32x4_t, f32) -VGETQLANE_IMPL_2(int64_t, int64x2_t, s64) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VGETQLANE_IMPL_8(float16_t, float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VGETLANE_IMPL_8 -#undef VGETLANE_IMPL_4 -#undef VGETLANE_IMPL_2 - -#undef VGETQLANE_IMPL_16 -#undef VGETQLANE_IMPL_8 -#undef VGETQLANE_IMPL_4 -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_GET_LANE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/getlow.h b/arm_compute/core/NEON/wrapper/intrinsics/getlow.h deleted file mode 100644 index dbc3d869e1..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/getlow.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_GET_LOW_H -#define ARM_COMPUTE_WRAPPER_GET_LOW_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VGETLOW_IMPL(half_vtype, vtype, postfix) \ - inline half_vtype vgetlow(const vtype val) \ - { \ - return vget_low_##postfix(val); \ - } - -VGETLOW_IMPL(uint8x8_t, uint8x16_t, u8) -VGETLOW_IMPL(int8x8_t, int8x16_t, s8) -VGETLOW_IMPL(uint16x4_t, uint16x8_t, u16) -VGETLOW_IMPL(int16x4_t, int16x8_t, s16) -VGETLOW_IMPL(uint32x2_t, uint32x4_t, u32) -VGETLOW_IMPL(int32x2_t, int32x4_t, s32) -VGETLOW_IMPL(float32x2_t, float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VGETLOW_IMPL(float16x4_t, float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VGETLOW_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_GET_LOW_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h deleted file mode 100644 index 1150daa073..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_INTRINSICS_H -#define ARM_COMPUTE_WRAPPER_INTRINSICS_H - -#include "arm_compute/core/NEON/wrapper/intrinsics/abs.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/add.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/and.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/bsl.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/ceq.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/cge.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/clt.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/combine.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/cvt.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/div.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/eor.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/exp.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/ext.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/gethigh.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/inv.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/load.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/log.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/max.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/min.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/mla.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/movl.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/movn.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/mul.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/neg.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/not.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/orr.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/pmax.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/pmin.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/pow.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/qmov.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/qmovun.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/rev64.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/round.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/setlane.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/sin.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/store.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/sub.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/tanh.h" -#include "arm_compute/core/NEON/wrapper/intrinsics/tbl.h" - -#endif /* ARM_COMPUTE_WRAPPER_INTRINSICS_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/inv.h b/arm_compute/core/NEON/wrapper/intrinsics/inv.h deleted file mode 100644 index 9da66baffa..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/inv.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_INV_H -#define ARM_COMPUTE_WRAPPER_INV_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VINV_IMPL(vtype, prefix, postfix) \ - inline vtype vinv(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -#define VINV_IMPL_INT(vtype, prefix, postfix) \ - inline vtype vinv(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VINV_IMPL(float32x2_t, vinv, f32) -VINV_IMPL_INT(int32x2_t, vinv, s32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VINV_IMPL(float16x4_t, vinv, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VINV_IMPL(float32x4_t, vinvq, f32) -VINV_IMPL_INT(int32x4_t, vinvq, s32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VINV_IMPL(float16x8_t, vinvq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VINV_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_INV_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h b/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h deleted file mode 100644 index 77adcf7b8c..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_INVSQRT_H -#define ARM_COMPUTE_WRAPPER_INVSQRT_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VINVSQRT_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vinvsqrt(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -#define VINVSQRT_IMPL_INT(stype, vtype, prefix, postfix) \ - inline vtype vinvsqrt(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VINVSQRT_IMPL(float, float32x2_t, vinvsqrt, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VINVSQRT_IMPL(float16_t, float16x4_t, vinvsqrt, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VINVSQRT_IMPL_INT(int, int32x4_t, vinvsqrt, s32) - -VINVSQRT_IMPL(float, float32x4_t, vinvsqrtq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VINVSQRT_IMPL(float16_t, float16x8_t, vinvsqrtq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VINVSQRT_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_INVSQRT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/load.h b/arm_compute/core/NEON/wrapper/intrinsics/load.h deleted file mode 100644 index d38350f05b..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/load.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_LOAD_H -#define ARM_COMPUTE_WRAPPER_LOAD_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VLOAD_IMPL(stype, vtype, postfix) \ - inline vtype vload(const stype *ptr) \ - { \ - return vld1_##postfix(ptr); \ - } - -VLOAD_IMPL(uint8_t, uint8x8_t, u8) -VLOAD_IMPL(int8_t, int8x8_t, s8) -VLOAD_IMPL(uint16_t, uint16x4_t, u16) -VLOAD_IMPL(int16_t, int16x4_t, s16) -VLOAD_IMPL(uint32_t, uint32x2_t, u32) -VLOAD_IMPL(int32_t, int32x2_t, s32) -//VLOAD_IMPL(uint64_t, uint64x1_t, u64) -//VLOAD_IMPL(int64_t, int64x1_t, s64) -VLOAD_IMPL(float, float32x2_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VLOAD_IMPL(float16_t, float16x4_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#define VLOADQ_IMPL(stype, vtype, postfix) \ - inline vtype vloadq(const stype *ptr) \ - { \ - return vld1q_##postfix(ptr); \ - } - -VLOADQ_IMPL(uint8_t, uint8x16_t, u8) -VLOADQ_IMPL(int8_t, int8x16_t, s8) -VLOADQ_IMPL(uint16_t, uint16x8_t, u16) -VLOADQ_IMPL(int16_t, int16x8_t, s16) -VLOADQ_IMPL(uint32_t, uint32x4_t, u32) -VLOADQ_IMPL(int32_t, int32x4_t, s32) -//VLOAD_IMPL(uint64_t, uint64x1_t, u64) -//VLOAD_IMPL(int64_t, int64x1_t, s64) -VLOADQ_IMPL(float, float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VLOADQ_IMPL(float16_t, float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#undef VLOAD_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_LOAD_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/log.h b/arm_compute/core/NEON/wrapper/intrinsics/log.h deleted file mode 100644 index 682830c122..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/log.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_LOG_H -#define ARM_COMPUTE_WRAPPER_LOG_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VLOG_IMPL(vtype, prefix, postfix) \ - inline vtype vlog(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -#define VLOG_IMPL_INT(vtype, prefix, postfix) \ - inline vtype vlog(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VLOG_IMPL(float32x4_t, vlogq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VLOG_IMPL(float16x8_t, vlogq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VLOG_IMPL_INT(int32x4_t, vlogq, s32) - -#undef VLOG_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_LOG_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/max.h b/arm_compute/core/NEON/wrapper/intrinsics/max.h deleted file mode 100644 index a87b7a32b5..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/max.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MAX_H -#define ARM_COMPUTE_WRAPPER_MAX_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMAX_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vmax(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VMAX_IMPL(uint8_t, uint8x8_t, vmax, u8) -VMAX_IMPL(int8_t, int8x8_t, vmax, s8) -VMAX_IMPL(uint16_t, uint16x4_t, vmax, u16) -VMAX_IMPL(int16_t, int16x4_t, vmax, s16) -VMAX_IMPL(uint32_t, uint32x2_t, vmax, u32) -VMAX_IMPL(int32_t, int32x2_t, vmax, s32) -VMAX_IMPL(float, float32x2_t, vmax, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMAX_IMPL(float16_t, float16x4_t, vmax, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VMAX_IMPL(uint8_t, uint8x16_t, vmaxq, u8) -VMAX_IMPL(int8_t, int8x16_t, vmaxq, s8) -VMAX_IMPL(uint16_t, uint16x8_t, vmaxq, u16) -VMAX_IMPL(int16_t, int16x8_t, vmaxq, s16) -VMAX_IMPL(uint32_t, uint32x4_t, vmaxq, u32) -VMAX_IMPL(int32_t, int32x4_t, vmaxq, s32) -VMAX_IMPL(float, float32x4_t, vmaxq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMAX_IMPL(float16_t, float16x8_t, vmaxq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VMAX_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MAX_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/min.h b/arm_compute/core/NEON/wrapper/intrinsics/min.h deleted file mode 100644 index dc8a127e82..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/min.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MIN_H -#define ARM_COMPUTE_WRAPPER_MIN_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMIN_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vmin(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VMIN_IMPL(uint8_t, uint8x8_t, vmin, u8) -VMIN_IMPL(int8_t, int8x8_t, vmin, s8) -VMIN_IMPL(uint16_t, uint16x4_t, vmin, u16) -VMIN_IMPL(int16_t, int16x4_t, vmin, s16) -VMIN_IMPL(uint32_t, uint32x2_t, vmin, u32) -VMIN_IMPL(int32_t, int32x2_t, vmin, s32) -VMIN_IMPL(float, float32x2_t, vmin, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMIN_IMPL(float16_t, float16x4_t, vmin, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VMIN_IMPL(uint8_t, uint8x16_t, vminq, u8) -VMIN_IMPL(int8_t, int8x16_t, vminq, s8) -VMIN_IMPL(uint16_t, uint16x8_t, vminq, u16) -VMIN_IMPL(int16_t, int16x8_t, vminq, s16) -VMIN_IMPL(uint32_t, uint32x4_t, vminq, u32) -VMIN_IMPL(int32_t, int32x4_t, vminq, s32) -VMIN_IMPL(float, float32x4_t, vminq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMIN_IMPL(float16_t, float16x8_t, vminq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VMIN_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MIN_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mla.h b/arm_compute/core/NEON/wrapper/intrinsics/mla.h deleted file mode 100644 index dd2f0c0d9d..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/mla.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MLA_H -#define ARM_COMPUTE_WRAPPER_MLA_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMLA_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \ - { \ - return prefix##_##postfix(a, b, c); \ - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#define VMLA_IMPL2(stype, vtype, prefix1, prefix2, postfix) \ - inline vtype vmla(const vtype &a, const vtype &b, const vtype &c) \ - { \ - return prefix1##_##postfix(a, prefix2##_##postfix(b, c)); \ - } -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VMLA_IMPL(uint8x8_t, uint8x8_t, vmla, u8) -VMLA_IMPL(int8x8_t, int8x8_t, vmla, s8) -VMLA_IMPL(uint16x4_t, uint16x4_t, vmla, u16) -VMLA_IMPL(int16x4_t, int16x4_t, vmla, s16) -VMLA_IMPL(uint32x2_t, uint32x2_t, vmla, u32) -VMLA_IMPL(int32x2_t, int32x2_t, vmla, s32) -VMLA_IMPL(float32x2_t, float32x2_t, vmla, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMLA_IMPL2(float16x4_t, float16x4_t, vadd, vmul, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VMLA_IMPL(uint8x16_t, uint8x16_t, vmlaq, u8) -VMLA_IMPL(int8x16_t, int8x16_t, vmlaq, s8) -VMLA_IMPL(uint16x8_t, uint16x8_t, vmlaq, u16) -VMLA_IMPL(int16x8_t, int16x8_t, vmlaq, s16) -VMLA_IMPL(uint32x4_t, uint32x4_t, vmlaq, u32) -VMLA_IMPL(int32x4_t, int32x4_t, vmlaq, s32) -VMLA_IMPL(float32x4_t, float32x4_t, vmlaq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMLA_IMPL2(float16x8_t, float16x8_t, vaddq, vmulq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VMLA_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MLA_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movl.h b/arm_compute/core/NEON/wrapper/intrinsics/movl.h deleted file mode 100644 index 982a795924..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/movl.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MOVL_H -#define ARM_COMPUTE_WRAPPER_MOVL_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMOVL_IMPL(ptype, vtype, prefix, postfix) \ - inline ptype vmovl(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VMOVL_IMPL(uint16x8_t, uint8x8_t, vmovl, u8) -VMOVL_IMPL(int16x8_t, int8x8_t, vmovl, s8) -VMOVL_IMPL(uint32x4_t, uint16x4_t, vmovl, u16) -VMOVL_IMPL(int32x4_t, int16x4_t, vmovl, s16) -VMOVL_IMPL(uint64x2_t, uint32x2_t, vmovl, u32) -VMOVL_IMPL(int64x2_t, int32x2_t, vmovl, s32) - -#undef VMOVL_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MOVL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/movn.h b/arm_compute/core/NEON/wrapper/intrinsics/movn.h deleted file mode 100644 index 23360e2597..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/movn.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MOVN_H -#define ARM_COMPUTE_WRAPPER_MOVN_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMOVN_IMPL(dtype, vtype, prefix, postfix) \ - inline dtype vmovn(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VMOVN_IMPL(uint32x2_t, uint64x2_t, vmovn, u64) -VMOVN_IMPL(int32x2_t, int64x2_t, vmovn, s64) -VMOVN_IMPL(uint16x4_t, uint32x4_t, vmovn, u32) -VMOVN_IMPL(int16x4_t, int32x4_t, vmovn, s32) -VMOVN_IMPL(uint8x8_t, uint16x8_t, vmovn, u16) -VMOVN_IMPL(int8x8_t, int16x8_t, vmovn, s16) - -#define VQMOVN_IMPL(dtype, vtype, prefix, postfix) \ - inline dtype vqmovn(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VQMOVN_IMPL(uint32x2_t, uint64x2_t, vqmovn, u64) -VQMOVN_IMPL(int32x2_t, int64x2_t, vqmovn, s64) -VQMOVN_IMPL(uint16x4_t, uint32x4_t, vqmovn, u32) -VQMOVN_IMPL(int16x4_t, int32x4_t, vqmovn, s32) -VQMOVN_IMPL(uint8x8_t, uint16x8_t, vqmovn, u16) -VQMOVN_IMPL(int8x8_t, int16x8_t, vqmovn, s16) - -#undef VMOVN_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MOVN_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/mul.h b/arm_compute/core/NEON/wrapper/intrinsics/mul.h deleted file mode 100644 index bbf70abac9..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/mul.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_MUL_H -#define ARM_COMPUTE_WRAPPER_MUL_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VMUL_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vmul(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VMUL_IMPL(uint8x8_t, uint8x8_t, vmul, u8) -VMUL_IMPL(int8x8_t, int8x8_t, vmul, s8) -VMUL_IMPL(uint16x4_t, uint16x4_t, vmul, u16) -VMUL_IMPL(int16x4_t, int16x4_t, vmul, s16) -VMUL_IMPL(uint32x2_t, uint32x2_t, vmul, u32) -VMUL_IMPL(int32x2_t, int32x2_t, vmul, s32) -VMUL_IMPL(float32x2_t, float32x2_t, vmul, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMUL_IMPL(float16_t, float16x4_t, vmul, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VMUL_IMPL(uint8_t, uint8x16_t, vmulq, u8) -VMUL_IMPL(int8_t, int8x16_t, vmulq, s8) -VMUL_IMPL(uint16_t, uint16x8_t, vmulq, u16) -VMUL_IMPL(int16_t, int16x8_t, vmulq, s16) -VMUL_IMPL(uint32_t, uint32x4_t, vmulq, u32) -VMUL_IMPL(int32_t, int32x4_t, vmulq, s32) -VMUL_IMPL(float32x4_t, float32x4_t, vmulq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VMUL_IMPL(float16_t, float16x8_t, vmulq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VMUL_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_MUL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/neg.h b/arm_compute/core/NEON/wrapper/intrinsics/neg.h deleted file mode 100644 index da2f285eca..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/neg.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_NEG_H -#define ARM_COMPUTE_WRAPPER_NEG_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VNEG_IMPL(vtype, prefix, postfix) \ - inline vtype vneg(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VNEG_IMPL(int8x8_t, vneg, s8) -VNEG_IMPL(int16x4_t, vneg, s16) -VNEG_IMPL(int32x2_t, vneg, s32) -VNEG_IMPL(float32x2_t, vneg, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VNEG_IMPL(float16x4_t, vneg, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VNEG_IMPL(int8x16_t, vnegq, s8) -VNEG_IMPL(int16x8_t, vnegq, s16) -VNEG_IMPL(int32x4_t, vnegq, s32) -VNEG_IMPL(float32x4_t, vnegq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VNEG_IMPL(float16x8_t, vnegq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VNEG_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_NEG_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/not.h b/arm_compute/core/NEON/wrapper/intrinsics/not.h deleted file mode 100644 index 5b1e4056ca..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/not.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_NOT_H -#define ARM_COMPUTE_WRAPPER_NOT_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VNOT_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vnot(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VNOT_IMPL(uint8_t, uint8x8_t, vmvn, u8) -VNOT_IMPL(int8_t, int8x8_t, vmvn, s8) -VNOT_IMPL(uint16_t, uint16x4_t, vmvn, u16) -VNOT_IMPL(int16_t, int16x4_t, vmvn, s16) -VNOT_IMPL(uint32_t, uint32x2_t, vmvn, u32) -VNOT_IMPL(int32_t, int32x2_t, vmvn, s32) -VNOT_IMPL(float32x2_t, float32x2_t, vinv, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VNOT_IMPL(float16x4_t, float16x4_t, vinv, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VNOT_IMPL(uint8_t, uint8x16_t, vmvnq, u8) -VNOT_IMPL(int8_t, int8x16_t, vmvnq, s8) -VNOT_IMPL(uint16_t, uint16x8_t, vmvnq, u16) -VNOT_IMPL(int16_t, int16x8_t, vmvnq, s16) -VNOT_IMPL(uint32_t, uint32x4_t, vmvnq, u32) -VNOT_IMPL(int32_t, int32x4_t, vmvnq, s32) -VNOT_IMPL(float32x4_t, float32x4_t, vinvq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VNOT_IMPL(float16x8_t, float16x8_t, vinvq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VNOT_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_NOT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/orr.h b/arm_compute/core/NEON/wrapper/intrinsics/orr.h deleted file mode 100644 index 0fbdd44c76..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/orr.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_ORR_H -#define ARM_COMPUTE_WRAPPER_ORR_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VORR_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vorr(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VORR_IMPL(uint8_t, uint8x8_t, vorr, u8) -VORR_IMPL(int8_t, int8x8_t, vorr, s8) -VORR_IMPL(uint16_t, uint16x4_t, vorr, u16) -VORR_IMPL(int16_t, int16x4_t, vorr, s16) -VORR_IMPL(uint32_t, uint32x2_t, vorr, u32) -VORR_IMPL(int32_t, int32x2_t, vorr, s32) -VORR_IMPL(uint64_t, uint64x1_t, vorr, u64) -VORR_IMPL(int64_t, int64x1_t, vorr, s64) - -VORR_IMPL(uint8_t, uint8x16_t, vorrq, u8) -VORR_IMPL(int8_t, int8x16_t, vorrq, s8) -VORR_IMPL(uint16_t, uint16x8_t, vorrq, u16) -VORR_IMPL(int16_t, int16x8_t, vorrq, s16) -VORR_IMPL(uint32_t, uint32x4_t, vorrq, u32) -VORR_IMPL(int32_t, int32x4_t, vorrq, s32) -VORR_IMPL(uint64_t, uint64x2_t, vorrq, u64) -VORR_IMPL(int64_t, int64x2_t, vorrq, s64) - -#undef VORR_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_ORR_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h b/arm_compute/core/NEON/wrapper/intrinsics/pmax.h deleted file mode 100644 index afad27f1e4..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/pmax.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_PMAX_H -#define ARM_COMPUTE_WRAPPER_PMAX_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VPMAX_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vpmax(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VPMAX_IMPL(uint8_t, uint8x8_t, vpmax, u8) -VPMAX_IMPL(int8_t, int8x8_t, vpmax, s8) -VPMAX_IMPL(uint16_t, uint16x4_t, vpmax, u16) -VPMAX_IMPL(int16_t, int16x4_t, vpmax, s16) -VPMAX_IMPL(uint32_t, uint32x2_t, vpmax, u32) -VPMAX_IMPL(int32_t, int32x2_t, vpmax, s32) -VPMAX_IMPL(float, float32x2_t, vpmax, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VPMAX_IMPL(float16_t, float16x4_t, vpmax, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VPMAX_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_PMAX_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h b/arm_compute/core/NEON/wrapper/intrinsics/pmin.h deleted file mode 100644 index 77c5cf61ba..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/pmin.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_PMIN_H -#define ARM_COMPUTE_WRAPPER_PMIN_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VPMIN_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vpmin(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VPMIN_IMPL(uint8_t, uint8x8_t, vpmin, u8) -VPMIN_IMPL(int8_t, int8x8_t, vpmin, s8) -VPMIN_IMPL(uint16_t, uint16x4_t, vpmin, u16) -VPMIN_IMPL(int16_t, int16x4_t, vpmin, s16) -VPMIN_IMPL(uint32_t, uint32x2_t, vpmin, u32) -VPMIN_IMPL(int32_t, int32x2_t, vpmin, s32) -VPMIN_IMPL(float, float32x2_t, vpmin, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VPMIN_IMPL(float16_t, float16x4_t, vpmin, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VPMIN_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_PMIN_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/pow.h b/arm_compute/core/NEON/wrapper/intrinsics/pow.h deleted file mode 100644 index 1b5d62df5e..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/pow.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_POW_H -#define ARM_COMPUTE_WRAPPER_POW_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VPOW_IMPL(vtype, prefix, postfix) \ - inline vtype vpow(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VPOW_IMPL(float32x4_t, vpowq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VPOW_IMPL(float16x8_t, vpowq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VPOW_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_POW_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h b/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h deleted file mode 100644 index a0347020db..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/qmovun.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_QMOVUN_H -#define ARM_COMPUTE_WRAPPER_QMOVUN_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VQMOVUN_IMPL(dtype, vtype, prefix, postfix) \ - inline dtype vqmovun(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VQMOVUN_IMPL(uint32x2_t, int64x2_t, vqmovun, s64) -VQMOVUN_IMPL(uint16x4_t, int32x4_t, vqmovun, s32) -VQMOVUN_IMPL(uint8x8_t, int16x8_t, vqmovun, s16) - -#undef VQMOVUN_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_QMOVUN_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h b/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h deleted file mode 100644 index 579da344a7..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/reinterpret.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_REINTERPRET_H -#define ARM_COMPUTE_WRAPPER_REINTERPRET_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VREINTERPRET_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ - inline ptype vreinterpret(const vtype &a) \ - { \ - return prefix##_##postfix1##_##postfix2(a); \ - } \ - \ - inline ptype vreinterpret(const ptype &a) \ - { \ - return a; \ - } - -VREINTERPRET_IMPL(int16x4_t, uint16x4_t, vreinterpret, s16, u16) - -VREINTERPRET_IMPL(int32x4_t, uint32x4_t, vreinterpretq, s32, u32) -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_REINTERPRET_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/rev64.h b/arm_compute/core/NEON/wrapper/intrinsics/rev64.h deleted file mode 100644 index 0385704f3f..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/rev64.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_REV64_H -#define ARM_COMPUTE_WRAPPER_REV64_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VREV64_IMPL(vtype, prefix, postfix) \ - inline vtype vrev64(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VREV64_IMPL(uint8x8_t, vrev64, u8) -VREV64_IMPL(int8x8_t, vrev64, s8) -VREV64_IMPL(uint16x4_t, vrev64, u16) -VREV64_IMPL(int16x4_t, vrev64, s16) -VREV64_IMPL(uint32x2_t, vrev64, u32) -VREV64_IMPL(int32x2_t, vrev64, s32) -VREV64_IMPL(float32x2_t, vrev64, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VREV64_IMPL(float16x4_t, vrev64, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VREV64_IMPL(uint8x16_t, vrev64q, u8) -VREV64_IMPL(int8x16_t, vrev64q, s8) -VREV64_IMPL(uint16x8_t, vrev64q, u16) -VREV64_IMPL(int16x8_t, vrev64q, s16) -VREV64_IMPL(uint32x4_t, vrev64q, u32) -VREV64_IMPL(int32x4_t, vrev64q, s32) -VREV64_IMPL(float32x4_t, vrev64q, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VREV64_IMPL(float16x8_t, vrev64q, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VREV64_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_REV64_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/round.h b/arm_compute/core/NEON/wrapper/intrinsics/round.h deleted file mode 100644 index d6f5a88689..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/round.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_ROUND_H -#define ARM_COMPUTE_WRAPPER_ROUND_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VROUNDQ_IMPL(vtype, postfix) \ - inline vtype vround(const vtype &a) \ - { \ - return vroundq_rte_##postfix(a); \ - } - -#define VROUNDQ_IMPL_INT(vtype, postfix) \ - inline vtype vround(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VROUNDQ_IMPL(float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VROUNDQ_IMPL(float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VROUNDQ_IMPL_INT(int32x4_t, s32) -#undef VROUNDQ_IMPL - -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_ROUND_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/setlane.h b/arm_compute/core/NEON/wrapper/intrinsics/setlane.h deleted file mode 100644 index 6332f3025e..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/setlane.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SET_LANE_H -#define ARM_COMPUTE_WRAPPER_SET_LANE_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VSETLANE_IMPL_8(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vset_lane_##postfix(value, vector, 0); \ - case 1: \ - return vset_lane_##postfix(value, vector, 1); \ - case 2: \ - return vset_lane_##postfix(value, vector, 2); \ - case 3: \ - return vset_lane_##postfix(value, vector, 3); \ - case 4: \ - return vset_lane_##postfix(value, vector, 4); \ - case 5: \ - return vset_lane_##postfix(value, vector, 5); \ - case 6: \ - return vset_lane_##postfix(value, vector, 6); \ - case 7: \ - return vset_lane_##postfix(value, vector, 7); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VSETLANE_IMPL_4(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vset_lane_##postfix(value, vector, 0); \ - case 1: \ - return vset_lane_##postfix(value, vector, 1); \ - case 2: \ - return vset_lane_##postfix(value, vector, 2); \ - case 3: \ - return vset_lane_##postfix(value, vector, 3); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VSETLANE_IMPL_2(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vset_lane_##postfix(value, vector, 0); \ - case 1: \ - return vset_lane_##postfix(value, vector, 1); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -VSETLANE_IMPL_8(uint8x8_t, uint8_t, uint8x8_t, u8) -VSETLANE_IMPL_8(int8x8_t, int8_t, int8x8_t, s8) -VSETLANE_IMPL_4(uint16x4_t, uint16_t, uint16x4_t, u16) -VSETLANE_IMPL_4(int16x4_t, int16_t, int16x4_t, s16) -VSETLANE_IMPL_2(uint32x2_t, uint32_t, uint32x2_t, u32) -VSETLANE_IMPL_2(int32x2_t, int32_t, int32x2_t, s32) -VSETLANE_IMPL_2(float32x2_t, float, float32x2_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSETLANE_IMPL_4(float16x4_t, float16_t, float16x4_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#define VSETQLANE_IMPL_16(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vsetq_lane_##postfix(value, vector, 0); \ - case 1: \ - return vsetq_lane_##postfix(value, vector, 1); \ - case 2: \ - return vsetq_lane_##postfix(value, vector, 2); \ - case 3: \ - return vsetq_lane_##postfix(value, vector, 3); \ - case 4: \ - return vsetq_lane_##postfix(value, vector, 4); \ - case 5: \ - return vsetq_lane_##postfix(value, vector, 5); \ - case 6: \ - return vsetq_lane_##postfix(value, vector, 6); \ - case 7: \ - return vsetq_lane_##postfix(value, vector, 7); \ - case 8: \ - return vsetq_lane_##postfix(value, vector, 8); \ - case 9: \ - return vsetq_lane_##postfix(value, vector, 9); \ - case 10: \ - return vsetq_lane_##postfix(value, vector, 10); \ - case 11: \ - return vsetq_lane_##postfix(value, vector, 11); \ - case 12: \ - return vsetq_lane_##postfix(value, vector, 12); \ - case 13: \ - return vsetq_lane_##postfix(value, vector, 13); \ - case 14: \ - return vsetq_lane_##postfix(value, vector, 14); \ - case 15: \ - return vsetq_lane_##postfix(value, vector, 15); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VSETQLANE_IMPL_8(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vsetq_lane_##postfix(value, vector, 0); \ - case 1: \ - return vsetq_lane_##postfix(value, vector, 1); \ - case 2: \ - return vsetq_lane_##postfix(value, vector, 2); \ - case 3: \ - return vsetq_lane_##postfix(value, vector, 3); \ - case 4: \ - return vsetq_lane_##postfix(value, vector, 4); \ - case 5: \ - return vsetq_lane_##postfix(value, vector, 5); \ - case 6: \ - return vsetq_lane_##postfix(value, vector, 6); \ - case 7: \ - return vsetq_lane_##postfix(value, vector, 7); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -#define VSETQLANE_IMPL_4(stype, atype, vtype, postfix) \ - inline stype vsetlane(const atype value, const vtype vector, const unsigned int lane) \ - { \ - switch(lane) \ - { \ - case 0: \ - return vsetq_lane_##postfix(value, vector, 0); \ - case 1: \ - return vsetq_lane_##postfix(value, vector, 1); \ - case 2: \ - return vsetq_lane_##postfix(value, vector, 2); \ - case 3: \ - return vsetq_lane_##postfix(value, vector, 3); \ - default: \ - ARM_COMPUTE_ERROR("Invalid lane"); \ - } \ - } - -VSETQLANE_IMPL_16(uint8x16_t, uint8_t, uint8x16_t, u8) -VSETQLANE_IMPL_16(int8x16_t, int8_t, int8x16_t, s8) -VSETQLANE_IMPL_8(uint16x8_t, uint16_t, uint16x8_t, u16) -VSETQLANE_IMPL_8(int16x8_t, int16_t, int16x8_t, s16) -VSETQLANE_IMPL_4(uint32x4_t, uint32_t, uint32x4_t, u32) -VSETQLANE_IMPL_4(int32x4_t, int32_t, int32x4_t, s32) -VSETQLANE_IMPL_4(float32x4_t, float, float32x4_t, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSETQLANE_IMPL_8(float16x8_t, float16_t, float16x8_t, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VSETLANE_IMPL_8 -#undef VSETLANE_IMPL_4 -#undef VSETLANE_IMPL_2 - -#undef VSETQLANE_IMPL_16 -#undef VSETQLANE_IMPL_8 -#undef VSETQLANE_IMPL_4 -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_SET_LANE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sin.h b/arm_compute/core/NEON/wrapper/intrinsics/sin.h deleted file mode 100644 index bca72db38a..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/sin.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SIN_H -#define ARM_COMPUTE_WRAPPER_SIN_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VSIN_IMPL(vtype, prefix, postfix) \ - inline vtype vsin(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -#define VSIN_IMPL_INT(vtype, prefix, postfix) \ - inline vtype vsin(const vtype &a) \ - { \ - ARM_COMPUTE_UNUSED(a); \ - ARM_COMPUTE_ERROR("Not supported"); \ - } - -VSIN_IMPL(float32x4_t, vsinq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSIN_IMPL(float16x8_t, vsinq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VSIN_IMPL_INT(int32x4_t, vsinq, s32) - -#undef vsub_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_SUB_H */
\ No newline at end of file diff --git a/arm_compute/core/NEON/wrapper/intrinsics/store.h b/arm_compute/core/NEON/wrapper/intrinsics/store.h deleted file mode 100644 index eb2ae6a5e1..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/store.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_STORE_H -#define ARM_COMPUTE_WRAPPER_STORE_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VSTORE_IMPL(stype, vtype, prefix, postfix) \ - inline void vstore(stype *ptr, vtype val) \ - { \ - prefix##_##postfix(ptr, val); \ - } - -VSTORE_IMPL(uint8_t, uint8x8_t, vst1, u8) -VSTORE_IMPL(uint8_t, uint8x8x2_t, vst2, u8) -VSTORE_IMPL(int8_t, int8x8_t, vst1, s8) -VSTORE_IMPL(int8_t, int8x8x2_t, vst2, s8) -VSTORE_IMPL(uint16_t, uint16x4_t, vst1, u16) -VSTORE_IMPL(int16_t, int16x4_t, vst1, s16) -VSTORE_IMPL(uint32_t, uint32x2_t, vst1, u32) -VSTORE_IMPL(int32_t, int32x2_t, vst1, s32) -//VSTORE_IMPL(uint64_t, 1, vst1, u64) -//VSTORE_IMPL(int64_t, 1, vst1, s64) -VSTORE_IMPL(float, float32x2_t, vst1, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSTORE_IMPL(float16_t, float16x4_t, vst1, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VSTORE_IMPL(uint8_t, uint8x16_t, vst1q, u8) -VSTORE_IMPL(int8_t, int8x16_t, vst1q, s8) -VSTORE_IMPL(uint16_t, uint16x8_t, vst1q, u16) -VSTORE_IMPL(int16_t, int16x8_t, vst1q, s16) -VSTORE_IMPL(uint32_t, uint32x4_t, vst1q, u32) -VSTORE_IMPL(int32_t, int32x4_t, vst1q, s32) -//VSTORE_IMPL(uint64_t, 2, vst1q, u64) -//VSTORE_IMPL(int64_t, 2, vst1q, s64) -VSTORE_IMPL(float, float32x4_t, vst1q, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSTORE_IMPL(float16_t, float16x8_t, vst1q, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VSTORE_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_STORE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/sub.h b/arm_compute/core/NEON/wrapper/intrinsics/sub.h deleted file mode 100644 index f46b57c815..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/sub.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SUB_H -#define ARM_COMPUTE_WRAPPER_SUB_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VSUB_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vsub(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VSUB_IMPL(uint8x8_t, uint8x8_t, vsub, u8) -VSUB_IMPL(int8x8_t, int8x8_t, vsub, s8) -VSUB_IMPL(uint16x4_t, uint16x4_t, vsub, u16) -VSUB_IMPL(int16x4_t, int16x4_t, vsub, s16) -VSUB_IMPL(uint32x2_t, uint32x2_t, vsub, u32) -VSUB_IMPL(int32x2_t, int32x2_t, vsub, s32) -VSUB_IMPL(uint64x1_t, uint64x1_t, vsub, u64) -VSUB_IMPL(int64x1_t, int64x1_t, vsub, s64) -VSUB_IMPL(float32x2_t, float32x2_t, vsub, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSUB_IMPL(float16x4_t, float16x4_t, vsub, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VSUB_IMPL(uint8x16_t, uint8x16_t, vsubq, u8) -VSUB_IMPL(int8x16_t, int8x16_t, vsubq, s8) -VSUB_IMPL(uint16x8_t, uint16x8_t, vsubq, u16) -VSUB_IMPL(int16x8_t, int16x8_t, vsubq, s16) -VSUB_IMPL(uint32x4_t, uint32x4_t, vsubq, u32) -VSUB_IMPL(int32x4_t, int32x4_t, vsubq, s32) -VSUB_IMPL(uint64x2_t, uint64x2_t, vsubq, u64) -VSUB_IMPL(int64x2_t, int64x2_t, vsubq, s64) -VSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -#undef VSUB_IMPL - -// VQSUB: Vector saturating sub (No notion of saturation for floating point) -#define VQSUB_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vqsub(const vtype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VQSUB_IMPL(uint8x8_t, uint8x8_t, vqsub, u8) -VQSUB_IMPL(int8x8_t, int8x8_t, vqsub, s8) -VQSUB_IMPL(uint16x4_t, uint16x4_t, vqsub, u16) -VQSUB_IMPL(int16x4_t, int16x4_t, vqsub, s16) -VQSUB_IMPL(uint32x2_t, uint32x2_t, vqsub, u32) -VQSUB_IMPL(int32x2_t, int32x2_t, vqsub, s32) -VQSUB_IMPL(uint64x1_t, uint64x1_t, vqsub, u64) -VQSUB_IMPL(int64x1_t, int64x1_t, vqsub, s64) -VQSUB_IMPL(float32x2_t, float32x2_t, vsub, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VQSUB_IMPL(float16x4_t, float16x4_t, vsub, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - -VQSUB_IMPL(uint8x16_t, uint8x16_t, vqsubq, u8) -VQSUB_IMPL(int8x16_t, int8x16_t, vqsubq, s8) -VQSUB_IMPL(uint16x8_t, uint16x8_t, vqsubq, u16) -VQSUB_IMPL(int16x8_t, int16x8_t, vqsubq, s16) -VQSUB_IMPL(uint32x4_t, uint32x4_t, vqsubq, u32) -VQSUB_IMPL(int32x4_t, int32x4_t, vqsubq, s32) -VQSUB_IMPL(uint64x2_t, uint64x2_t, vqsubq, u64) -VQSUB_IMPL(int64x2_t, int64x2_t, vqsubq, s64) -VQSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VQSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#undef VQSUB_IMPL - -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_SUB_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/tanh.h b/arm_compute/core/NEON/wrapper/intrinsics/tanh.h deleted file mode 100644 index 648a001ca7..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/tanh.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_TANH_H -#define ARM_COMPUTE_WRAPPER_TANH_H - -#include "arm_compute/core/NEON/NEMath.h" -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VTANH_IMPL(vtype, prefix, postfix) \ - inline vtype vtanh(const vtype &a) \ - { \ - return prefix##_##postfix(a); \ - } - -VTANH_IMPL(float32x4_t, vtanhq, f32) -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -VTANH_IMPL(float16x8_t, vtanhq, f16) -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#undef VTANH_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_TANH_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/tbl.h b/arm_compute/core/NEON/wrapper/intrinsics/tbl.h deleted file mode 100644 index d3d6b72e6a..0000000000 --- a/arm_compute/core/NEON/wrapper/intrinsics/tbl.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_TBL_H -#define ARM_COMPUTE_WRAPPER_TBL_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -#define VTBL_IMPL(stype, vtype, prefix, postfix) \ - inline vtype vtbl(const stype &a, const vtype &b) \ - { \ - return prefix##_##postfix(a, b); \ - } - -VTBL_IMPL(uint8x8x2_t, uint8x8_t, vtbl2, u8) -VTBL_IMPL(int8x8x2_t, int8x8_t, vtbl2, s8) - -#undef VTBL_IMPL -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_TBL_H */ diff --git a/arm_compute/core/NEON/wrapper/scalar/add.h b/arm_compute/core/NEON/wrapper/scalar/add.h deleted file mode 100644 index 5a04fe20fa..0000000000 --- a/arm_compute/core/NEON/wrapper/scalar/add.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SCALAR_ADD_H -#define ARM_COMPUTE_WRAPPER_SCALAR_ADD_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -inline uint8_t add_sat(const uint8_t &a, const uint8_t &b) -{ - const uint8x8_t va = { a, 0, 0, 0, 0, 0, 0, 0 }; - const uint8x8_t vb = { b, 0, 0, 0, 0, 0, 0, 0 }; - return vget_lane_u8(vqadd_u8(va, vb), 0); -} - -inline int16_t add_sat(const int16_t &a, const int16_t &b) -{ - const int16x4_t va = { a, 0, 0, 0 }; - const int16x4_t vb = { b, 0, 0, 0 }; - return vget_lane_s16(vqadd_s16(va, vb), 0); -} - -inline float add_sat(const float &a, const float &b) -{ - // No notion of saturation exists in floating point - return a + b; -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline float16_t add_sat(const float16_t &a, const float16_t &b) -{ - // No notion of saturation exists in floating point - return a + b; -} -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_SCALAR_ADD_H */ diff --git a/arm_compute/core/NEON/wrapper/scalar/scalar.h b/arm_compute/core/NEON/wrapper/scalar/scalar.h deleted file mode 100644 index ff2d807c0e..0000000000 --- a/arm_compute/core/NEON/wrapper/scalar/scalar.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SCALAR_H -#define ARM_COMPUTE_WRAPPER_SCALAR_H - -#include "arm_compute/core/NEON/wrapper/scalar/add.h" -#include "arm_compute/core/NEON/wrapper/scalar/sub.h" - -#endif /* ARM_COMPUTE_WRAPPER_SCALAR_H */ diff --git a/arm_compute/core/NEON/wrapper/scalar/sub.h b/arm_compute/core/NEON/wrapper/scalar/sub.h deleted file mode 100644 index 5b4cab93d3..0000000000 --- a/arm_compute/core/NEON/wrapper/scalar/sub.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_SCALAR_SUB_H -#define ARM_COMPUTE_WRAPPER_SCALAR_SUB_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -inline uint8_t sub_sat(const uint8_t &a, const uint8_t &b) -{ - const uint8x8_t va = { a, 0, 0, 0, 0, 0, 0, 0 }; - const uint8x8_t vb = { b, 0, 0, 0, 0, 0, 0, 0 }; - return vget_lane_u8(vqsub_u8(va, vb), 0); -} - -inline int16_t sub_sat(const int16_t &a, const int16_t &b) -{ - const int16x4_t va = { a, 0, 0, 0 }; - const int16x4_t vb = { b, 0, 0, 0 }; - return vget_lane_s16(vqsub_s16(va, vb), 0); -} - -inline float sub_sat(const float &a, const float &b) -{ - // No notion of saturation exists in floating point - return a - b; -} - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -inline float16_t sub_sat(const float16_t &a, const float16_t &b) -{ - // No notion of saturation exists in floating point - return a - b; -} -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_SCALAR_SUB_H */ diff --git a/arm_compute/core/NEON/wrapper/traits.h b/arm_compute/core/NEON/wrapper/traits.h deleted file mode 100644 index ae77d2778c..0000000000 --- a/arm_compute/core/NEON/wrapper/traits.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2018-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_TRAITS_H -#define ARM_COMPUTE_WRAPPER_TRAITS_H - -#include <arm_neon.h> - -namespace arm_compute -{ -namespace wrapper -{ -namespace traits -{ -// *INDENT-OFF* -// clang-format off - -/** 64-bit vector tag */ -struct vector_64_tag {}; -/** 128-bit vector tag */ -struct vector_128_tag {}; - -/** Create the appropriate NEON vector given its type and size in terms of elements */ -template <typename T, int S> struct neon_vector; - -// Specializations -#ifndef DOXYGEN_SKIP_THIS -template <> struct neon_vector<uint8_t, 8>{ using scalar_type = uint8_t; using type = uint8x8_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<int8_t, 8>{ using scalar_type = int8_t; using type = int8x8_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<uint8_t, 16>{ using scalar_type = uint8_t; using type = uint8x16_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<int8_t, 16>{ using scalar_type = int8_t; using type = int8x16_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<uint16_t, 4>{ using scalar_type = uint16_t; using type = uint16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<int16_t, 4>{ using scalar_type = int16_t; using type = int16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<uint16_t, 8>{ using scalar_type = uint16_t; using type = uint16x8_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<uint16_t, 16>{ using scalar_type = uint16_t; using type = uint16x8x2_t; }; -template <> struct neon_vector<int16_t, 8>{ using scalar_type = int16_t; using type = int16x8_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<int16_t, 16>{ using scalar_type = int16_t; using type = int16x8x2_t; }; -template <> struct neon_vector<uint32_t, 2>{ using scalar_type = uint32_t; using type = uint32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<int32_t, 2>{ using scalar_type = int32_t; using type = int32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<uint32_t, 4>{ using scalar_type = uint32_t; using type = uint32x4_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<int32_t, 4>{ using scalar_type = int32_t; using type = int32x4_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<uint64_t, 1>{ using scalar_type = uint64_t;using type = uint64x1_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<int64_t, 1>{ using scalar_type = int64_t; using type = int64x1_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<uint64_t, 2>{ using scalar_type = uint64_t; using type = uint64x2_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<int64_t, 2>{ using scalar_type = int64_t; using type = int64x2_t; using tag_type = vector_128_tag; }; -template <> struct neon_vector<float_t, 2>{ using scalar_type = float_t; using type = float32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<float_t, 4>{ using scalar_type = float_t; using type = float32x4_t; using tag_type = vector_128_tag; }; - -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template <> struct neon_vector<float16_t, 4>{ using scalar_type = float16_t; using type = float16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_vector<float16_t, 8>{ using scalar_type = float16_t; using type = float16x8_t; using tag_type = vector_128_tag; }; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#endif /* DOXYGEN_SKIP_THIS */ - -/** Helper type template to get the type of a neon vector */ -template <typename T, int S> using neon_vector_t = typename neon_vector<T, S>::type; -/** Helper type template to get the tag type of a neon vector */ -template <typename T, int S> using neon_vector_tag_t = typename neon_vector<T, S>::tag_type; - -/** Vector bit-width enum class */ -enum class BitWidth -{ - W64, /**< 64-bit width */ - W128, /**< 128-bit width */ -}; - -/** Create the appropriate NEON vector given its type and size in terms of bits */ -template <typename T, BitWidth BW> struct neon_bitvector; -// Specializations -#ifndef DOXYGEN_SKIP_THIS -template <> struct neon_bitvector<uint8_t, BitWidth::W64>{ using type = uint8x8_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<int8_t, BitWidth::W64>{ using type = int8x8_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<uint8_t, BitWidth::W128>{ using type = uint8x16_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<int8_t, BitWidth::W128>{ using type = int8x16_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<uint16_t, BitWidth::W64>{ using type = uint16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<int16_t, BitWidth::W64>{ using type = int16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<uint16_t, BitWidth::W128>{ using type = uint16x8_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<int16_t, BitWidth::W128>{ using type = int16x8_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<uint32_t, BitWidth::W64>{ using type = uint32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<int32_t, BitWidth::W64>{ using type = int32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<uint32_t, BitWidth::W128>{ using type = uint32x4_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<int32_t, BitWidth::W128>{ using type = int32x4_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<uint64_t, BitWidth::W64>{ using type = uint64x1_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<int64_t, BitWidth::W64>{ using type = int64x1_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<uint64_t, BitWidth::W128>{ using type = uint64x2_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<int64_t, BitWidth::W128>{ using type = int64x2_t; using tag_type = vector_128_tag; }; -template <> struct neon_bitvector<float_t, BitWidth::W64>{ using type = float32x2_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<float_t, BitWidth::W128>{ using type = float32x4_t; using tag_type = vector_128_tag; }; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -template <> struct neon_bitvector<float16_t, BitWidth::W64>{ using type = float16x4_t; using tag_type = vector_64_tag; }; -template <> struct neon_bitvector<float16_t, BitWidth::W128>{ using type = float16x8_t; using tag_type = vector_128_tag; }; -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -#endif /* DOXYGEN_SKIP_THIS */ - -/** Helper type template to get the type of a neon vector */ -template <typename T, BitWidth BW> using neon_bitvector_t = typename neon_bitvector<T, BW>::type; -/** Helper type template to get the tag type of a neon vector */ -template <typename T, BitWidth BW> using neon_bitvector_tag_t = typename neon_bitvector<T, BW>::tag_type; - -/** Promote a type */ -template <typename T> struct promote { }; -template <> struct promote<uint8_t> { using type = uint16_t; }; -template <> struct promote<int8_t> { using type = int16_t; }; -template <> struct promote<uint16_t> { using type = uint32_t; }; -template <> struct promote<int16_t> { using type = int32_t; }; -template <> struct promote<uint32_t> { using type = uint64_t; }; -template <> struct promote<int32_t> { using type = int64_t; }; -template <> struct promote<float> { using type = float; }; -template <> struct promote<half> { using type = half; }; - -/** Get promoted type */ -template <typename T> -using promote_t = typename promote<T>::type; - -// clang-format on -// *INDENT-ON* -} // namespace traits -} // namespace wrapper -} // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_TRAITS_H */ diff --git a/arm_compute/core/NEON/wrapper/wrapper.h b/arm_compute/core/NEON/wrapper/wrapper.h deleted file mode 100644 index 99a5909e8b..0000000000 --- a/arm_compute/core/NEON/wrapper/wrapper.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_WRAPPER_H -#define ARM_COMPUTE_WRAPPER_H - -// Traits -#include "arm_compute/core/NEON/wrapper/traits.h" - -// Intrinsics Overloads -#include "arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h" -#include "arm_compute/core/NEON/wrapper/scalar/scalar.h" - -#endif /* ARM_COMPUTE_WRAPPER_H */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 337ccbc3f7..0b4df4f2e2 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_PIXELVALUE_H #define ARM_COMPUTE_PIXELVALUE_H +#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/Types.h" #include <cstdint> @@ -35,8 +36,7 @@ class PixelValue { public: /** Default constructor: value initialized to 0 */ - PixelValue() - : value{ int64_t(0) } + PixelValue() noexcept : value{int64_t(0)} { } /** Initialize the union with a pixel value of chosen datatype @@ -45,10 +45,9 @@ public: * @param[in] datatype DataType that @p v have to be stored * @param[in] qinfo (Optional) QuantizationInfo to apply in case of quantized data types to @p v */ - PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) - : PixelValue() + PixelValue(double v, DataType datatype, QuantizationInfo qinfo = QuantizationInfo()) : PixelValue() { - switch(datatype) + switch (datatype) { case DataType::U8: value.u8 = static_cast<uint8_t>(v); @@ -108,8 +107,7 @@ public: * * @param[in] v S8 value. */ - PixelValue(int8_t v) - : PixelValue() + PixelValue(int8_t v) : PixelValue() { value.s8 = v; } @@ -117,8 +115,7 @@ public: * * @param[in] v U8 value. */ - PixelValue(uint8_t v) - : PixelValue() + PixelValue(uint8_t v) : PixelValue() { value.u8 = v; } @@ -126,8 +123,7 @@ public: * * @param[in] v U16 value. */ - PixelValue(uint16_t v) - : PixelValue() + PixelValue(uint16_t v) : PixelValue() { value.u16 = v; } @@ -135,8 +131,7 @@ public: * * @param[in] v S16 value. */ - PixelValue(int16_t v) - : PixelValue() + PixelValue(int16_t v) : PixelValue() { value.s16 = v; } @@ -144,8 +139,7 @@ public: * * @param[in] v U32 value. */ - PixelValue(uint32_t v) - : PixelValue() + PixelValue(uint32_t v) : PixelValue() { value.u32 = v; } @@ -153,8 +147,7 @@ public: * * @param[in] v S32 value. */ - PixelValue(int32_t v) - : PixelValue() + PixelValue(int32_t v) : PixelValue() { value.s32 = v; } @@ -163,8 +156,7 @@ public: * * @param[in] v U64 value. */ - PixelValue(uint64_t v) - : PixelValue() + PixelValue(uint64_t v) : PixelValue() { value.u64 = v; } @@ -172,8 +164,7 @@ public: * * @param[in] v S64 value. */ - PixelValue(int64_t v) - : PixelValue() + PixelValue(int64_t v) : PixelValue() { value.s64 = v; } @@ -181,8 +172,7 @@ public: * * @param[in] v F16 value. */ - PixelValue(bfloat16 v) - : PixelValue() + PixelValue(bfloat16 v) : PixelValue() { value.bf16 = v; } @@ -190,8 +180,7 @@ public: * * @param[in] v F16 value. */ - PixelValue(half v) - : PixelValue() + PixelValue(half v) : PixelValue() { value.f16 = v; } @@ -199,8 +188,7 @@ public: * * @param[in] v F32 value. */ - PixelValue(float v) - : PixelValue() + PixelValue(float v) : PixelValue() { value.f32 = v; } @@ -208,8 +196,7 @@ public: * * @param[in] v F64 value. */ - PixelValue(double v) - : PixelValue() + PixelValue(double v) : PixelValue() { value.f64 = v; } @@ -217,23 +204,23 @@ public: * Use the field corresponding to the image format */ union - { - uint64_t u64; /**< Single channel U64 */ - int64_t s64; /**< Single channel S64 */ - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - double f64; /**< Single channel double */ - float f32; /**< Single channel float 32 */ - half f16; /**< Single channel F16 */ - bfloat16 bf16; /**< Single channel brain floating-point number */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ - } value; + { + uint64_t u64; /**< Single channel U64 */ + int64_t s64; /**< Single channel S64 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + double f64; /**< Single channel double */ + float f32; /**< Single channel float 32 */ + half f16; /**< Single channel F16 */ + bfloat16 bf16; /**< Single channel brain floating-point number */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ + } value; /** Interpret the pixel value as a U8 * * @param[out] v Returned value diff --git a/arm_compute/core/PyramidInfo.h b/arm_compute/core/PyramidInfo.h deleted file mode 100644 index e8cbe3488a..0000000000 --- a/arm_compute/core/PyramidInfo.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_PYRAMIDINFO_H -#define ARM_COMPUTE_PYRAMIDINFO_H - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -#include <cstddef> - -namespace arm_compute -{ -/** Store the Pyramid's metadata */ -class PyramidInfo -{ -public: - /** Default constructor */ - PyramidInfo(); - /** Default destructor */ - virtual ~PyramidInfo() = default; - /** Allow instances of this class to be copy constructed */ - PyramidInfo(const PyramidInfo &) = default; - /** Allow instances of this class to be copied */ - PyramidInfo &operator=(const PyramidInfo &) = default; - /** Allow instances of this class to be move constructed */ - PyramidInfo(PyramidInfo &&) = default; - /** Allow instances of this class to be moved */ - PyramidInfo &operator=(PyramidInfo &&) = default; - - /** Create pyramid info for 2D tensors - * - * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value - * @param[in] scale Used to indicate the scale between the pyramid levels. - * This is required to be a non-zero positive value. - * @param[in] width The width of the 2D tensor at 0th pyramid level - * @param[in] height The height of the 2D tensor at 0th pyramid level - * @param[in] format The format of all 2D tensors in the pyramid - * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. - */ - PyramidInfo(size_t num_levels, float scale, size_t width, size_t height, Format format); - - /** Create pyramid info using TensorShape - * - * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value - * @param[in] scale Used to indicate the scale between the pyramid levels. - * This is required to be a non-zero positive value. - * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements - * @param[in] format The format of all tensors in the pyramid - */ - PyramidInfo(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); - - /** Initialize pyramid's metadata for 2D tensors - * - * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value - * @param[in] scale Used to indicate the scale between the pyramid levels. - * This is required to be a non-zero positive value. - * @param[in] width The width of the 2D tensor at 0th pyramid level - * @param[in] height The height of the 2D tensor at 0th pyramid level - * @param[in] format The format of all 2D tensors in the pyramid - * NV12, NV21, IYUV, UYVY and YUYV formats are not supported. - */ - void init(size_t num_levels, float scale, size_t width, size_t height, Format format); - /** Initialize pyramid's metadata using TensorShape - * - * @param[in] num_levels The number of pyramid levels. This is required to be a non-zero value - * @param[in] scale Used to indicate the scale between the pyramid levels. - * This is required to be a non-zero positive value. - * @param[in] tensor_shape It specifies the size for each dimension of the tensor 0th pyramid level in number of elements - * @param[in] format The format of all tensors in the pyramid - */ - void init(size_t num_levels, float scale, const TensorShape &tensor_shape, Format format); - /** Return the number of the pyramid levels - * - * @return The number of the pyramid levels - */ - size_t num_levels() const; - /** Return the width of the 0th level tensor - * - * @return The width of the 0th level tensor - */ - size_t width() const; - /** Return the height of the 0th level tensor - * - * @return The height of the 0th level tensor - */ - size_t height() const; - /** Return the TensorShape of the o-th level tensor - * - * @return - */ - const TensorShape &tensor_shape() const; - /** Return the image format of all tensor in the pyramid - * - * @return The image format - */ - Format format() const; - /** Return the scale factor of the pyramid - * - * @return Return the scale factor - */ - float scale() const; - -private: - size_t _num_levels; - TensorShape _tensor_shape; - Format _format; - float _scale; -}; -} -#endif /*ARM_COMPUTE_PYRAMIDINFO_H */ diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 52ef149e9b..aecba3712e 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H -#define ARM_COMPUTE_QUANTIZATION_INFO_H +#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H +#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H #include "arm_compute/core/Rounding.h" -#include "utils/misc/Utility.h" -#include "arm_compute/core/Error.h" +#include "arm_compute/core/utils/misc/Utility.h" + +#include "support/ToolchainSupport.h" -#include <cstddef> -#include <type_traits> #include <vector> namespace arm_compute @@ -43,8 +42,7 @@ using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value struct UniformQuantizationInfo { /** Default constructor */ - UniformQuantizationInfo() - : scale(0.f), offset(0) + UniformQuantizationInfo() : scale(0.f), offset(0) { } /** Constructor @@ -52,8 +50,7 @@ struct UniformQuantizationInfo * @param[in] scale Quantization scale * @param[in] offset Quantization offset */ - UniformQuantizationInfo(float scale, int32_t offset) - : scale(scale), offset(offset) + UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset) { } /** Checks if the scale and offset are both zero */ @@ -71,9 +68,7 @@ class QuantizationInfo { public: /** Default constructor */ - QuantizationInfo() noexcept - : _scale(), - _offset() + QuantizationInfo() noexcept : _scale(), _offset() { } /** Construct quantization info. @@ -82,19 +77,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(float scale) - : _scale(1, scale), _offset() + QuantizationInfo(float scale) : _scale(1, scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(float scale, int offset) - : _scale(1, scale), _offset(1, offset) + QuantizationInfo(float scale, int offset, bool is_dynamic = false) + : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic) { } /** Construct quantization info. @@ -103,19 +98,19 @@ public: * * @param[in] scale Scale. */ - QuantizationInfo(std::vector<float> scale) - : _scale(scale), _offset() + QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset() { } /** Construct quantization info. * * @note Used for asymmetric per channel quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset) - : _scale(scale), _offset(offset) + QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false) + : _scale(scale), _offset(offset), _is_dynamic(is_dynamic) { } /** Scale vector accessor @@ -134,6 +129,14 @@ public: { return _offset; } + /** is_dynamic accessor + * + * @return If true, the scale and offset may change, so operators will need to read on every run + */ + bool is_dynamic() const + { + return _is_dynamic; + } /** Indicates whether this QuantizationInfo has valid settings or not * * @return True if the this has invalid settings. @@ -158,6 +161,8 @@ public: private: std::vector<float> _scale; /**< Vector containing scaling factors */ std::vector<int32_t> _offset; /**< Vector containing zero offsets */ + bool _is_dynamic = + false; /**< If true, the scale and offset may change, so operators will need to read on every run */ }; /** Check whether two quantization info are equal. @@ -210,20 +215,39 @@ inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantiza template <typename QUANTIZED_TYPE = uint8_t> struct Qasymm8QuantizationHelper { - static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value - || std::is_same<QUANTIZED_TYPE, int8_t>::value, + static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value, "quantized type should be either uint8_t or int8_t."); /** Quantize a value given a 8-bit asymmetric quantization scheme * + * @param[in] value Value to quantize + * @param[in] qinfo Quantization information to use for quantizing + * + * @return Quantized value + */ + static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo) + { + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); + const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset; + return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized)); + } + + /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy + * * @param[in] value Value to quantize * @param[in] qinfo Quantization information to use for quantizing - * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up + * @param[in] rounding_policy Rounding policy to use * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE + quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy) { + if (rounding_policy == RoundingPolicy::TO_NEAREST_UP) + { + return quantize(value, qinfo); + } + ARM_COMPUTE_ERROR_ON(qinfo.scale == 0); const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized)); @@ -237,7 +261,8 @@ struct Qasymm8QuantizationHelper * * @return Quantized value */ - static inline QUANTIZED_TYPE quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) + static inline QUANTIZED_TYPE + quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { const UniformQuantizationInfo uqinfo = qinfo.uniform(); ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0); @@ -280,7 +305,8 @@ struct Qasymm8QuantizationHelper * @return Quantized value */ template <typename INFO_TYPE> -inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint8_t +quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy); } @@ -294,7 +320,9 @@ inline uint8_t quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPol * @return Quantized value */ template <typename INFO_TYPE> -inline int8_t quantize_qasymm8_signed(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int8_t quantize_qasymm8_signed(float value, + const INFO_TYPE &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy); } @@ -416,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) return (static_cast<int>(value) - offset) * scale; } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] scale Scale to use for dequantization + * @param[in] offset Zero-offset to use for dequantization + * + * @return Dequantized value + */ +inline float dequantize(int32_t value, float scale, int32_t offset) +{ + return (static_cast<int>(value) - offset) * scale; +} + /** Quantize a value given a 16-bit symmetric quantization scheme * * @param[in] value Value to quantize @@ -424,7 +465,9 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) * * @return Quantized value */ -inline int16_t quantize_qsymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline int16_t quantize_qsymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy); quantized = arm_compute::utility::clamp<int, int16_t>(quantized); @@ -475,7 +518,9 @@ inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo) * * @return Quantized value */ -inline uint16_t quantize_qasymm16(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) +inline uint16_t quantize_qasymm16(float value, + const UniformQuantizationInfo &qinfo, + RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP) { int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset; quantized = arm_compute::utility::clamp<int, uint16_t>(quantized); @@ -518,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) return dequantize_qasymm16(value, qinfo.uniform()); } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo) +{ + return (static_cast<int>(value) - qinfo.offset) * qinfo.scale; +} + +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ + +inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo) +{ + return dequantize_s32(value, qinfo.uniform()); +} + /* * In case of requantization of a quantized input tensor to an output tensor with another quantization * instead of applying dequantization and then a quantization functions, we just compute new scale and @@ -548,7 +618,8 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) * z_n = - z_i * s_i / s_o + z_o * */ -inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, const UniformQuantizationInfo &uqinfo_out) +inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in, + const UniformQuantizationInfo &uqinfo_out) { float scale_to_apply = uqinfo_out.scale; int32_t offset_to_apply = uqinfo_out.offset; @@ -562,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform } } // namespace arm_compute -#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */ +#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H diff --git a/arm_compute/core/Rounding.h b/arm_compute/core/Rounding.h index 68d742907b..30a5a0fe9d 100644 --- a/arm_compute/core/Rounding.h +++ b/arm_compute/core/Rounding.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,5 +42,5 @@ enum class RoundingPolicy * @return Rounded value of the argument x. */ int round(float x, RoundingPolicy rounding_policy); -} +} // namespace arm_compute #endif /*ARM_COMPUTE_ROUNDING_H */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h index 722d7450f6..672b392050 100644 --- a/arm_compute/core/Size2D.h +++ b/arm_compute/core/Size2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,8 +41,7 @@ public: * @param[in] w Width of the image or rectangle * @param[in] h Height of the image or rectangle */ - Size2D(size_t w, size_t h) - : width(w), height(h) + Size2D(size_t w, size_t h) noexcept : width(w), height(h) { } /** The area of the image or rectangle calculated as (width * height) @@ -89,5 +88,5 @@ public: size_t width = {}; /**< Width of the image region or rectangle */ size_t height = {}; /**< Height of the image region or rectangle */ }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_SIZE2D_H */ diff --git a/arm_compute/core/Size3D.h b/arm_compute/core/Size3D.h new file mode 100644 index 0000000000..e2dc6fe012 --- /dev/null +++ b/arm_compute/core/Size3D.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_SIZE3D_H +#define ARM_COMPUTE_SIZE3D_H + +#include <string> + +namespace arm_compute +{ +/** Class for specifying the size of a 3D shape or object */ +class Size3D +{ +public: + /** Default constructor */ + Size3D() = default; + /** Constructor. Initializes "width", "height" and "depth" respectively with "w", "h" and "d" + * + * @param[in] w Width of the 3D shape or object + * @param[in] h Height of the 3D shape or object + * @param[in] d Depth of the 3D shape or object + */ + Size3D(size_t w, size_t h, size_t d) noexcept : width(w), height(h), depth(d) + { + } + + /** Convert the values stored to string + * + * @return string of (width x height x depth). + */ + std::string to_string() const; + + /** Semantic accessor for width as x. + * + * @return x. + */ + size_t x() const + { + return width; + } + + /** Semantic accessor for height as y. + * + * @return y. + */ + size_t y() const + { + return height; + } + + /** Semantic accessor for depth as z. + * + * @return z. + */ + size_t z() const + { + return depth; + } + + bool operator!=(const Size3D &other) const + { + return !(*this == other); + } + + bool operator==(const Size3D &other) const + { + return (width == other.width) && (height == other.height) && (depth == other.depth); + } + +public: + size_t width = {}; /**< Width of the 3D shape or object */ + size_t height = {}; /**< Height of the 3D shape or object */ + size_t depth = {}; /**< Depth of the 3D shape or object */ +}; + +} // namespace arm_compute +#endif /* ARM_COMPUTE_SIZE3D_H */ diff --git a/arm_compute/core/Steps.h b/arm_compute/core/Steps.h index 6c89185a1f..6b261becc0 100644 --- a/arm_compute/core/Steps.h +++ b/arm_compute/core/Steps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,8 +45,7 @@ public: * @param[in] steps Values to initialize the steps. */ template <typename... Ts> - Steps(Ts... steps) - : Dimensions{ steps... } + Steps(Ts... steps) : Dimensions{steps...} { // Initialize empty dimensions to 1 std::fill(_id.begin() + _num_dimensions, _id.end(), 1); @@ -62,5 +61,5 @@ public: /** Default destructor */ ~Steps() = default; }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_STEPS_H*/ diff --git a/arm_compute/core/Strides.h b/arm_compute/core/Strides.h index a2a73377ea..627b219987 100644 --- a/arm_compute/core/Strides.h +++ b/arm_compute/core/Strides.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include <algorithm> #include <array> #include <cstddef> +#include <cstdint> namespace arm_compute { @@ -42,8 +43,7 @@ public: * @param[in] strides Values to initialize the strides. */ template <typename... Ts> - constexpr Strides(Ts... strides) - : Dimensions{ strides... } + constexpr Strides(Ts... strides) : Dimensions{strides...} { } /** Allow instances of this class to be copy constructed */ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h index bcb570ae7f..7a3ee2cfd0 100644 --- a/arm_compute/core/SubTensorInfo.h +++ b/arm_compute/core/SubTensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,14 +24,12 @@ #ifndef ARM_COMPUTE_SUBTENSORINFO_H #define ARM_COMPUTE_SUBTENSORINFO_H -#include "arm_compute/core/ITensorInfo.h" - #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Validate.h" #include <cstddef> #include <memory> @@ -74,7 +72,7 @@ public: // Inherited methods overridden: std::unique_ptr<ITensorInfo> clone() const override; - ITensorInfo &set_data_type(DataType data_type) override + ITensorInfo &set_data_type(DataType data_type) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); _parent->set_data_type(data_type); @@ -99,6 +97,7 @@ public: return *this; }; ITensorInfo &set_tensor_shape(const TensorShape &shape) override; + ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override; ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); @@ -116,7 +115,13 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->auto_padding(); }; + + ITensorInfo &set_lock_paddings(bool flag) override; + + bool lock_paddings() const override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override { return _tensor_shape[index]; @@ -137,7 +142,7 @@ public: return _parent->offset_element_in_bytes(_coords); } int32_t offset_element_in_bytes(const Coordinates &pos) const override; - size_t element_size() const override + size_t element_size() const override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->element_size(); @@ -156,6 +161,11 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _tensor_shape; } + const TensorDimsState &tensor_dims_state() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _dims_state; + } DataType data_type() const override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); @@ -191,16 +201,21 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->is_dynamic(); } + bool are_values_constant() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->are_values_constant(); + } ITensorInfo &set_is_resizable(bool is_resizable) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); _parent->set_is_resizable(is_resizable); return *this; } - ITensorInfo &set_is_dynamic(bool is_dynamic) override + ITensorInfo &set_are_values_constant(bool are_values_constant) override { ARM_COMPUTE_ERROR_ON(_parent == nullptr); - _parent->set_is_dynamic(is_dynamic); + _parent->set_are_values_constant(are_values_constant); return *this; } ValidRegion valid_region() const override @@ -211,7 +226,7 @@ public: { ARM_COMPUTE_ERROR_ON(_parent == nullptr); // Check if subtensor is valid if parent is configured - if(_parent->tensor_shape().total_size() != 0) + if (_parent->tensor_shape().total_size() != 0) { ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(_parent->valid_region(), valid_region); } @@ -227,13 +242,26 @@ public: ARM_COMPUTE_ERROR_ON(_parent == nullptr); return _parent->data_layout(); } + ITensorInfo::Id id() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->id(); + } + ITensorInfo &set_id(ITensorInfo::Id id) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_id(id); + return *this; + } private: - ITensorInfo *_parent; - TensorShape _tensor_shape; - Coordinates _coords; - ValidRegion _valid_region; - bool _extend_parent; + ITensorInfo *_parent; + TensorShape _tensor_shape; + TensorDimsState _dims_state; + Coordinates _coords; + ValidRegion _valid_region; + bool _extend_parent; + bool _lock_paddings; }; } // namespace arm_compute #endif /*ARM_COMPUTE_SUBTENSORINFO_H */ diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index 68570d58db..b18f750427 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,23 +24,19 @@ #ifndef ARM_COMPUTE_TENSORINFO_H #define ARM_COMPUTE_TENSORINFO_H -#include "arm_compute/core/ITensorInfo.h" - -#include "ITensorInfo.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" +#include "ITensorInfo.h" #include <cstddef> #include <memory> namespace arm_compute { -class HOGInfo; - /** Store the tensor's metadata */ class TensorInfo final : public ITensorInfo { @@ -52,7 +48,7 @@ public: /** Allow instances of this class to be copy constructed */ TensorInfo(const ITensorInfo &info); /** Allow instances of this class to be copy constructed */ - TensorInfo(const TensorInfo &) = default; + TensorInfo(const TensorInfo &); /** Allow instances of this class to be copied */ TensorInfo &operator=(const TensorInfo &) = default; /** Allow instances of this class to be move constructed */ @@ -115,15 +111,10 @@ public: * @param[in] data_type Data type to use for each tensor element * @param[in] quantization_info The quantization settings for the tensor data. */ - TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info); - - /** Constructor - * - * @param[in] hog_info HOG's metadata used to allocate normalized HOG space - * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on - * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on - */ - TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height); + TensorInfo(const TensorShape &tensor_shape, + size_t num_channels, + DataType data_type, + QuantizationInfo quantization_info); /** Initialize the tensor info with just a format. * @@ -147,7 +138,11 @@ public: * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). */ - void init(const TensorShape &tensor_shape, Format format, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, size_t total_size_in_bytes); + void init(const TensorShape &tensor_shape, + Format format, + const Strides &strides_in_bytes, + size_t offset_first_element_in_bytes, + size_t total_size_in_bytes); /** Initialize the tensor info with just a format. * @@ -175,15 +170,12 @@ public: * @param[in] offset_first_element_in_bytes Offset in bytes from the beginning of memory allocation to access the first element. * @param[in] total_size_in_bytes Size in bytes of the memory allocation (including the offset to the first element). */ - void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, const Strides &strides_in_bytes, size_t offset_first_element_in_bytes, - size_t total_size_in_bytes); - /** Initialize the metadata structure for the given HOG's metadata - * - * @param[in] hog_info HOG's metadata used to allocate normalized HOG space - * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on - * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on - */ - void init(const HOGInfo &hog_info, unsigned int width, unsigned int height); + void init(const TensorShape &tensor_shape, + size_t num_channels, + DataType data_type, + const Strides &strides_in_bytes, + size_t offset_first_element_in_bytes, + size_t total_size_in_bytes); /** Initialize the metadata structure for the given tensor shape and single-plane format, (Padding is automatically calculated) * * @note The padding used by this method is really conservative so that the tensor can be used for most functions. @@ -206,30 +198,22 @@ public: * @return Total allocation size including padding in bytes. */ size_t init_auto_padding(const TensorShape &tensor_shape, size_t num_channels, DataType data_type); - /** Initialize the metadata structure for the given HOG's metadata - * - * @note init_auto_padding will be used for the tensor initialization. - * - * @param[in] hog_info HOG's metadata used to allocate normalized HOG space - * @param[in] width Width of the 2D tensor where the HOG descriptor will be computed on - * @param[in] height Height of the 2D tensor where the HOG descriptor will be computed on - * - * @return Total allocation size including padding in bytes. - */ - size_t init_auto_padding(const HOGInfo &hog_info, unsigned int width, unsigned int height); // Inherited methods overridden: std::unique_ptr<ITensorInfo> clone() const override; - ITensorInfo &set_data_type(DataType data_type) override; - ITensorInfo &set_num_channels(int num_channels) override; - ITensorInfo &set_format(Format format) override; - ITensorInfo &set_tensor_shape(const TensorShape &shape) override; - ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override; - ITensorInfo &set_data_layout(const DataLayout &data_layout) override; - ITensorInfo &reset_padding() override; - bool auto_padding() override; - bool extend_padding(const PaddingSize &padding) override; - size_t dimension(size_t index) const override + ITensorInfo &set_data_type(DataType data_type) override; + ITensorInfo &set_num_channels(int num_channels) override; + ITensorInfo &set_format(Format format) override; + ITensorInfo &set_tensor_shape(const TensorShape &shape) override; + ITensorInfo &set_tensor_dims_state(const TensorDimsState &state) override; + ITensorInfo &set_quantization_info(const QuantizationInfo &quantization_info) override; + ITensorInfo &set_data_layout(const DataLayout &data_layout) override; + ITensorInfo &reset_padding() override; + bool auto_padding() override; + ITensorInfo &set_lock_paddings(bool flag) override; + bool lock_paddings() const override; + bool extend_padding(const PaddingSize &padding) override; + size_t dimension(size_t index) const override { return _tensor_shape[index]; } @@ -246,7 +230,7 @@ public: return _offset_first_element_in_bytes; } int32_t offset_element_in_bytes(const Coordinates &pos) const override; - size_t element_size() const override + size_t element_size() const override { return data_size_from_type(_data_type) * _num_channels; } @@ -262,6 +246,10 @@ public: { return _tensor_shape; } + const TensorDimsState &tensor_dims_state() const override + { + return _dims_state; + } DataType data_type() const override { return _data_type; @@ -288,16 +276,16 @@ public: } bool is_dynamic() const override { - return _is_dynamic; + return std::find(std::cbegin(_dims_state), std::cend(_dims_state), get_dynamic_state_value()) != + std::cend(_dims_state); } - ITensorInfo &set_is_resizable(bool is_resizable) override + bool are_values_constant() const override { - _is_resizable = is_resizable; - return *this; + return _are_values_constant; } - ITensorInfo &set_is_dynamic(bool is_dynamic) override + ITensorInfo &set_is_resizable(bool is_resizable) override { - _is_dynamic = is_dynamic; + _is_resizable = is_resizable; return *this; } ValidRegion valid_region() const override @@ -316,6 +304,21 @@ public: { return _data_layout; } + ITensorInfo &set_are_values_constant(bool are_values_constant) override + { + _are_values_constant = are_values_constant; + return *this; + } + ITensorInfo::Id id() const override + { + return _id; + } + ITensorInfo &set_id(ITensorInfo::Id id) override + { + _id = id; + return *this; + } + inline friend bool operator==(const TensorInfo &lhs, const TensorInfo &rhs); private: /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. @@ -329,14 +332,37 @@ private: Strides _strides_in_bytes; size_t _num_channels; TensorShape _tensor_shape; + TensorDimsState _dims_state; DataType _data_type; Format _format; bool _is_resizable; - bool _is_dynamic; ValidRegion _valid_region; PaddingSize _padding; QuantizationInfo _quantization_info; DataLayout _data_layout; + bool _are_values_constant; + ITensorInfo::Id _id; + bool _lock_paddings; }; + +/** Check whether two tensor info are equal. + * + * @param[in] lhs LHS tensor info. + * @param[in] rhs RHS tensor info. + * + * @return True if the given tensor infos are the same. + */ +inline bool operator==(const TensorInfo &lhs, const TensorInfo &rhs) +{ + return (lhs._total_size == rhs._total_size) && + (lhs._offset_first_element_in_bytes == rhs._offset_first_element_in_bytes) && + (lhs._strides_in_bytes == rhs._strides_in_bytes) && (lhs._num_channels == rhs._num_channels) && + (lhs._tensor_shape == rhs._tensor_shape) && (lhs._dims_state == rhs._dims_state) && + (lhs._data_type == rhs._data_type) && (lhs._format == rhs._format) && + (lhs._is_resizable == rhs._is_resizable) && (lhs._valid_region == rhs._valid_region) && + (lhs._padding == rhs._padding) && (lhs._quantization_info == rhs._quantization_info) && + (lhs._data_layout == rhs._data_layout) && (lhs._are_values_constant == rhs._are_values_constant) && + (lhs._id == rhs._id); +} } // namespace arm_compute #endif /*ARM_COMPUTE_TENSORINFO_H */ diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h index 57d8f6cf63..c1707e262f 100644 --- a/arm_compute/core/TensorShape.h +++ b/arm_compute/core/TensorShape.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,7 @@ namespace arm_compute { /** Shape of a tensor */ -class TensorShape : public Dimensions<uint32_t> +class TensorShape : public Dimensions<size_t> { public: /** Constructor to initialize the tensor shape. @@ -44,11 +44,10 @@ public: * @param[in] dims Values to initialize the dimensions. */ template <typename... Ts> - TensorShape(Ts... dims) - : Dimensions{ dims... } + TensorShape(Ts... dims) : Dimensions{dims...} { // Initialize unspecified dimensions to 1 - if(_num_dimensions > 0) + if (_num_dimensions > 0) { std::fill(_id.begin() + _num_dimensions, _id.end(), 1); } @@ -71,14 +70,15 @@ public: * * @param[in] dimension Dimension for which the value is set. * @param[in] value Value to be set for the dimension. - * @param[in] apply_dim_correction Flag to state whether apply dimension correction after setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but _num_dimensions should be 3 rather than 1. + * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction after setting one dimension. E.g. when permuting NCHW -> NHWC, 1x1x2 would become 2x1x1, but _num_dimensions should be 3 rather than 1. + * @param[in] increase_dim_unit (Optional) Set to true if new unit dimensions increase the number of dimensions of the shape. * * @return *this. */ - TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true) + TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true) { // Clear entire shape if one dimension is zero - if(value == 0) + if (value == 0) { _num_dimensions = 0; std::fill(_id.begin(), _id.end(), 0); @@ -90,10 +90,10 @@ public: // Set the specified dimension and increase the number of dimensions if // necessary - Dimensions::set(dimension, value); + Dimensions::set(dimension, value, increase_dim_unit); // Correct number dimensions to ignore trailing dimensions of size 1 - if(apply_dim_correction) + if (apply_dim_correction) { apply_dimension_correction(); } @@ -105,9 +105,10 @@ public: * * @note The upper dimensions of the tensor shape will be shifted down by 1 * - * @param[in] n Dimension to remove + * @param[in] n Dimension to remove + * @param[in] apply_dim_correction (Optional) Flag to state whether apply dimension correction (removing trailing dimensions with size of 1) after removing a dimension. */ - void remove_dimension(size_t n) + void remove_dimension(size_t n, bool apply_dim_correction = true) { ARM_COMPUTE_ERROR_ON(_num_dimensions < 1); ARM_COMPUTE_ERROR_ON(n >= _num_dimensions); @@ -121,7 +122,10 @@ public: std::fill(_id.begin() + _num_dimensions, _id.end(), 1); // Correct number dimensions to ignore trailing dimensions of size 1 - apply_dimension_correction(); + if (apply_dim_correction) + { + apply_dimension_correction(); + } } /** Collapse the first n dimensions. @@ -207,26 +211,26 @@ public: * @return The broadcasted shape or an empty shape if the shapes are not broadcast compatible. */ template <typename... Shapes> - static TensorShape broadcast_shape(const Shapes &... shapes) + static TensorShape broadcast_shape(const Shapes &...shapes) { TensorShape bc_shape; - auto broadcast = [&bc_shape](const TensorShape & other) + auto broadcast = [&bc_shape](const TensorShape &other) { - if(bc_shape.num_dimensions() == 0) + if (bc_shape.num_dimensions() == 0) { bc_shape = other; } - else if(other.num_dimensions() != 0) + else if (other.num_dimensions() != 0) { - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { const size_t dim_min = std::min(bc_shape[d], other[d]); const size_t dim_max = std::max(bc_shape[d], other[d]); - if((dim_min != 1) && (dim_min != dim_max)) + if ((dim_min != 1) && (dim_min != dim_max)) { - bc_shape = TensorShape{ 0U }; + bc_shape = TensorShape{0U}; break; } @@ -244,9 +248,9 @@ private: /** Remove trailing dimensions of size 1 from the reported number of dimensions. */ void apply_dimension_correction() { - for(int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i) + for (int i = static_cast<int>(_num_dimensions) - 1; i > 0; --i) { - if(_id[i] == 1) + if (_id[i] == 1) { --_num_dimensions; } @@ -257,5 +261,5 @@ private: } } }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_TENSORSHAPE_H*/ diff --git a/arm_compute/core/TracePoint.h b/arm_compute/core/TracePoint.h deleted file mode 100644 index 6951d6d5ef..0000000000 --- a/arm_compute/core/TracePoint.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TRACEPOINT_H -#define ARM_COMPUTE_TRACEPOINT_H - -#include <string> -#include <type_traits> -#include <vector> - -namespace arm_compute -{ -#ifdef ARM_COMPUTE_TRACING_ENABLED -#define CREATE_TRACEPOINT(...) TracePoint __tp(__VA_ARGS__) - -/** Class used to dump configuration values in functions and kernels */ -class TracePoint final -{ -public: - /** Layer types */ - enum class Layer - { - CORE, - RUNTIME - }; - /** struct describing the arguments for a tracepoint */ - struct Args final - { - std::vector<std::string> args{}; - }; - /** Constructor - * - * @param[in] source type of layer for the tracepoint - * @param[in] class_name the name of the class creating the tracepoint - * @param[in] object a pointer to the actual object owning the tracepoint - * @param[in] args a struct describing all the arguments used in the call to the configure() method - * - */ - TracePoint(Layer source, const std::string &class_name, void *object, Args &&args); - /** Destructor */ - ~TracePoint(); - -private: - static int g_depth; /**< current depth */ - int _depth; /**< tracepoint depth */ -}; - -/** Operator to write an argument to a @ref TracePoint - * - * @param[in] tp Tracepoint to be used for writing - * @param[in] arg Argument to be written in the tracepoint - * - * @return A referece to the updated tracepoint - */ -template <typename T> -TracePoint::Args &&operator<<(typename std::enable_if < !std::is_pointer<T>::value, TracePoint::Args >::type &&tp, const T &arg); -template <typename T> -TracePoint::Args &&operator<<(TracePoint::Args &&tp, const T *arg); - -#define CONST_REF_CLASS(type) \ - template <> \ - TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type &arg) \ - { \ - ARM_COMPUTE_UNUSED(tp); \ - tp.args.push_back(#type "(" + to_string(arg) + ")"); \ - return std::move(tp); \ - } - -#define CONST_PTR_ADDRESS(type) \ - template <> \ - TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type *arg) \ - { \ - ARM_COMPUTE_UNUSED(tp); \ - tp.args.push_back(#type "*(" + to_ptr_string(arg) + ")"); \ - return std::move(tp); \ - } -#define CONST_PTR_CLASS(type) \ - template <> \ - TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type *arg) \ - { \ - ARM_COMPUTE_UNUSED(tp); \ - if(arg) \ - tp.args.push_back(#type "(" + to_string(*arg) + ")"); \ - else \ - tp.args.push_back(#type "( nullptr )"); \ - return std::move(tp); \ - } - -#define CONST_REF_SIMPLE(type) \ - template <> \ - TracePoint::Args &&operator<<(TracePoint::Args &&tp, const type &arg) \ - { \ - ARM_COMPUTE_UNUSED(tp); \ - tp.args.push_back(#type "(" + support::cpp11::to_string(arg) + ")"); \ - return std::move(tp); \ - } - -#define TRACE_TO_STRING(type) \ - std::string to_string(const type &arg) \ - { \ - ARM_COMPUTE_UNUSED(arg); \ - return ""; \ - } -#else /* ARM_COMPUTE_TRACING_ENABLED */ -#define CREATE_TRACEPOINT(...) -#define CONST_REF_CLASS(type) -#define CONST_PTR_ADDRESS(type) -#define CONST_PTR_CLASS(type) -#define CONST_REF_SIMPLE(type) -#define TRACE_TO_STRING(type) -#endif /* ARM_COMPUTE_TRACING_ENABLED */ -} //namespace arm_compute - -#endif /* ARM_COMPUTE_TRACEPOINT_H */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 4e73edba4b..f2f60c150e 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,17 +21,52 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TYPES_H -#define ARM_COMPUTE_TYPES_H - +#ifndef ACL_ARM_COMPUTE_CORE_TYPES_H +#define ACL_ARM_COMPUTE_CORE_TYPES_H + +/** The following symbols have been moved to: + * half + * PermutationVector + * Format + * DataType + * DataLayout + * DataLayoutDimension + * PadStrideInfo + * WeightFormat + * Channel + * DimensionRoundingType + */ +#include "arm_compute/core/CoreTypes.h" +/** The following symbols have been moved to: + * ActivationFunction + * ActivationLayerInfo + */ +#include "arm_compute/function_info/ActivationLayerInfo.h" +/** The following symbols have been moved to: + * ConvolutionInfo + */ +#include "arm_compute/function_info/ConvolutionInfo.h" +/** The following symbols have been moved to: + * FullyConnectedLayerInfo + */ +#include "arm_compute/function_info/FullyConnectedLayerInfo.h" +/** The following symbols have been moved to: + * GEMMLowpOutputStageType + * GEMMLowpOutputStageInfo + * GEMMInfo + */ +#include "arm_compute/function_info/GEMMInfo.h" +/** The following symbols have been moved to: + * MatMulInfo + */ #include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/QuantizationInfo.h" #include "arm_compute/core/Size2D.h" -#include "arm_compute/core/Strides.h" +#include "arm_compute/core/Size3D.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/utils/misc/Macros.h" +#include "arm_compute/function_info/MatMulInfo.h" + #include "support/Bfloat16.h" -#include "support/Half.h" #include <cmath> #include <cstddef> @@ -42,62 +77,9 @@ namespace arm_compute { -/** 16-bit floating point type */ -using half = half_float::half; - -/** Permutation vector */ -using PermutationVector = Strides; /** Bidirectional strides */ using BiStrides = Coordinates; -/** Image colour formats */ -enum class Format -{ - UNKNOWN, /**< Unknown image format */ - U8, /**< 1 channel, 1 U8 per channel */ - S16, /**< 1 channel, 1 S16 per channel */ - U16, /**< 1 channel, 1 U16 per channel */ - S32, /**< 1 channel, 1 S32 per channel */ - U32, /**< 1 channel, 1 U32 per channel */ - BFLOAT16, /**< 16-bit brain floating-point number */ - F16, /**< 1 channel, 1 F16 per channel */ - F32, /**< 1 channel, 1 F32 per channel */ - UV88, /**< 2 channel, 1 U8 per channel */ - RGB888, /**< 3 channels, 1 U8 per channel */ - RGBA8888, /**< 4 channels, 1 U8 per channel */ - YUV444, /**< A 3 plane of 8 bit 4:4:4 sampled Y, U, V planes */ - YUYV422, /**< A single plane of 32-bit macro pixel of Y0, U0, Y1, V0 bytes */ - NV12, /**< A 2 plane YUV format of Luma (Y) and interleaved UV data at 4:2:0 sampling */ - NV21, /**< A 2 plane YUV format of Luma (Y) and interleaved VU data at 4:2:0 sampling */ - IYUV, /**< A 3 plane of 8-bit 4:2:0 sampled Y, U, V planes */ - UYVY422 /**< A single plane of 32-bit macro pixel of U0, Y0, V0, Y1 byte */ -}; - -/** Available data types */ -enum class DataType -{ - UNKNOWN, /**< Unknown data type */ - U8, /**< unsigned 8-bit number */ - S8, /**< signed 8-bit number */ - QSYMM8, /**< quantized, symmetric fixed-point 8-bit number */ - QASYMM8, /**< quantized, asymmetric fixed-point 8-bit number unsigned */ - QASYMM8_SIGNED, /**< quantized, asymmetric fixed-point 8-bit number signed */ - QSYMM8_PER_CHANNEL, /**< quantized, symmetric per channel fixed-point 8-bit number */ - U16, /**< unsigned 16-bit number */ - S16, /**< signed 16-bit number */ - QSYMM16, /**< quantized, symmetric fixed-point 16-bit number */ - QASYMM16, /**< quantized, asymmetric fixed-point 16-bit number */ - U32, /**< unsigned 32-bit number */ - S32, /**< signed 32-bit number */ - U64, /**< unsigned 64-bit number */ - S64, /**< signed 64-bit number */ - BFLOAT16, /**< 16-bit brain floating-point number */ - F16, /**< 16-bit floating-point number */ - F32, /**< 32-bit floating-point number */ - F64, /**< 64-bit floating-point number */ - SIZET /**< size_t */ -}; - /** Available Sampling Policies */ enum class SamplingPolicy { @@ -105,42 +87,15 @@ enum class SamplingPolicy TOP_LEFT /**< Samples are taken at pixel top left corner */ }; -/** Constant value of the border pixels when using BorderMode::CONSTANT */ -constexpr uint8_t CONSTANT_BORDER_VALUE = 199; - -/** Constant value used to indicate a half-scale pyramid */ -constexpr float SCALE_PYRAMID_HALF = 0.5f; - -/** Constant value used to indicate a ORB scaled pyramid */ -constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; - -/** [DataLayout enum definition] **/ - -/** Supported tensor data layouts */ -enum class DataLayout -{ - UNKNOWN, /**< Unknown data layout */ - NCHW, /**< Num samples, channels, height, width */ - NHWC /**< Num samples, height, width, channels */ -}; -/** [DataLayout enum definition] **/ - -/** Supported tensor data layout dimensions */ -enum class DataLayoutDimension -{ - CHANNEL, /**< channel */ - HEIGHT, /**< height */ - WIDTH, /**< width */ - BATCHES /**< batches */ -}; - /** Available ConvolutionMethod*/ enum class ConvolutionMethod { - GEMM, /**< Convolution using GEMM */ - DIRECT, /**< Direct convolution */ - WINOGRAD, /**< Convolution using Winograd */ - FFT /**< Convolution using FFT */ + GEMM, /**< Convolution using GEMM */ + GEMM_CONV2D, /**< Direct 2D GEMM convolution */ + DIRECT, /**< Direct convolution */ + INDIRECT, /**< Indirect convolution */ + WINOGRAD, /**< Convolution using Winograd */ + FFT /**< Convolution using FFT */ }; /** Available DepthwiseConvolutionFunction*/ @@ -153,8 +108,9 @@ enum class DepthwiseConvolutionFunction /** Available DeconvolutionMethod*/ enum class DeconvolutionMethod { - GEMM, /**< Deconvolution using GEMM */ - DIRECT, /**< Direct deconvolution */ + GEMM, /**< Deconvolution using GEMM */ + DIRECT, /**< Direct deconvolution */ + UPSCALE_CONV2D /**< Deconvolution with Upscaling */ }; /** Available FuseBatchNormalizationType*/ @@ -187,8 +143,7 @@ enum class ComparisonOperation struct ValidRegion { /** Default constructor */ - ValidRegion() - : anchor{}, shape{} + ValidRegion() : anchor{}, shape{} { } @@ -209,8 +164,7 @@ struct ValidRegion * @param[in] a_shape Shape of the valid region. * */ - ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) - : anchor{ an_anchor }, shape{ a_shape } + ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape) : anchor{an_anchor}, shape{a_shape} { anchor.set_num_dimensions(std::max(anchor.num_dimensions(), shape.num_dimensions())); } @@ -223,7 +177,7 @@ struct ValidRegion * */ ValidRegion(const Coordinates &an_anchor, const TensorShape &a_shape, size_t num_dimensions) - : anchor{ an_anchor }, shape{ a_shape } + : anchor{an_anchor}, shape{a_shape} { ARM_COMPUTE_ERROR_ON(num_dimensions < std::max(anchor.num_dimensions(), shape.num_dimensions())); anchor.set_num_dimensions(num_dimensions); @@ -256,9 +210,22 @@ struct ValidRegion return *this; } + /** Check whether two valid regions are equal. + * + * @param[in] lhs LHS valid region + * @param[in] rhs RHS valid region + * + * @return True if the valid regions are the same. + */ + inline friend bool operator==(const ValidRegion &lhs, const ValidRegion &rhs); + Coordinates anchor; /**< Anchor for the start of the valid region. */ TensorShape shape; /**< Shape of the valid region. */ }; +inline bool operator==(const ValidRegion &lhs, const ValidRegion &rhs) +{ + return (lhs.anchor == rhs.anchor) && (lhs.shape == rhs.shape); +} /** Methods available to handle borders */ enum class BorderMode @@ -272,26 +239,24 @@ enum class BorderMode struct BorderSize { /** Empty border, i.e. no border */ - constexpr BorderSize() - : top{ 0 }, right{ 0 }, bottom{ 0 }, left{ 0 } + constexpr BorderSize() noexcept : top{0}, right{0}, bottom{0}, left{0} { } /** Border with equal size around the 2D plane */ - explicit constexpr BorderSize(unsigned int size) - : top{ size }, right{ size }, bottom{ size }, left{ size } + explicit constexpr BorderSize(unsigned int size) noexcept : top{size}, right{size}, bottom{size}, left{size} { } /** Border with same size for top/bottom and left/right */ constexpr BorderSize(unsigned int top_bottom, unsigned int left_right) - : top{ top_bottom }, right{ left_right }, bottom{ top_bottom }, left{ left_right } + : top{top_bottom}, right{left_right}, bottom{top_bottom}, left{left_right} { } /** Border with different sizes */ constexpr BorderSize(unsigned int top, unsigned int right, unsigned int bottom, unsigned int left) - : top{ top }, right{ right }, bottom{ bottom }, left{ left } + : top{top}, right{right}, bottom{bottom}, left{left} { } @@ -337,6 +302,28 @@ struct BorderSize return size; } + /** Check equality with another BorderSize struct + * + * @param[in] rhs other struct to check against + * + * @return true if they are equal + */ + bool operator==(const BorderSize &rhs) const + { + return (top == rhs.top) && (right == rhs.right) && (bottom == rhs.bottom) && (left == rhs.left); + } + + /** Check non-equality with another BorderSize struct + * + * @param[in] rhs other struct to check against + * + * @return true if they are different + */ + bool operator!=(const BorderSize &rhs) const + { + return !(*this == rhs); + } + /** Limit this border size. * * @param[in] limit Border size to limit this border size to. @@ -358,7 +345,11 @@ struct BorderSize /** Container for 2D padding size */ using PaddingSize = BorderSize; -/** Policy to handle overflow */ +/** Policy to handle integer overflow + * @note: This is ignored by floating point operations where the overflow behavior adheres to the IEEE-754 standard + * which states that in case of overflow ±infinity is returned for the round-to-nearest modes (and follows the + * rounding rules for the directed rounding modes) by default. + */ enum class ConvertPolicy { WRAP, /**< Wrap around */ @@ -370,7 +361,7 @@ enum class InterpolationPolicy { NEAREST_NEIGHBOR, /**< Output values are defined to match the source pixel whose center is nearest to the sample position */ BILINEAR, /**< Output values are defined by bilinear interpolation between the pixels */ - AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ + AREA, /**< Output values are determined by averaging the source pixels whose areas fall under the area of the destination pixel, projected onto the source image */ }; /** Bilinear Interpolation method used by LKTracker */ @@ -380,53 +371,6 @@ enum class BilinearInterpolation BILINEAR_SCHARR /**< Scharr method */ }; -/** Threshold mode */ -enum class ThresholdType -{ - BINARY, /**< Threshold with one value */ - RANGE /**< Threshold with two values*/ -}; - -/** Termination criteria */ -enum class Termination -{ - TERM_CRITERIA_EPSILON, /**< Terminate when within epsilon of a threshold */ - TERM_CRITERIA_ITERATIONS, /**< Terminate after a maximum number of iterations */ - TERM_CRITERIA_BOTH /**< Terminate on whichever of the other conditions occurs first */ -}; - -/** Magnitude calculation type. */ -enum class MagnitudeType -{ - L1NORM, /**< L1 normalization type */ - L2NORM /**< L2 normalization type */ -}; - -/** Phase calculation type. - * - * @note When PhaseType == SIGNED, each angle is mapped to the range 0 to 255 inclusive otherwise angles between 0 and 180 - */ -enum class PhaseType -{ - SIGNED, /**< Angle range: [0, 360] */ - UNSIGNED /**< Angle range: [0, 180] */ -}; - -/** Keypoint type */ -struct KeyPoint -{ - int32_t x{ 0 }; /**< X coordinates */ - int32_t y{ 0 }; /**< Y coordinates */ - float strength{ 0.f }; /**< Strength of the point */ - float scale{ 0.f }; /**< Scale initialized to 0 by the corner detector */ - float orientation{ 0.f }; /**< Orientation initialized to 0 by the corner detector */ - int32_t tracking_status{ 0 }; /**< Status initialized to 1 by the corner detector, set to 0 when the point is lost */ - float error{ 0.f }; /**< Tracking error initialized to 0 by the corner detector */ -}; - -/** Internal key point */ -using InternalKeypoint = std::tuple<float, float, float>; /* x,y,strength */ - /** Rectangle type */ struct Rectangle { @@ -460,40 +404,6 @@ using PaddingList = std::vector<PaddingInfo>; /** Information to produce a tiled version of a Tensor */ using Multiples = std::vector<uint32_t>; -/** Available channels */ -enum class Channel -{ - UNKNOWN, /** Unknown channel format */ - C0, /**< First channel (used by formats with unknown channel types). */ - C1, /**< Second channel (used by formats with unknown channel types). */ - C2, /**< Third channel (used by formats with unknown channel types). */ - C3, /**< Fourth channel (used by formats with unknown channel types). */ - R, /**< Red channel. */ - G, /**< Green channel. */ - B, /**< Blue channel. */ - A, /**< Alpha channel. */ - Y, /**< Luma channel. */ - U, /**< Cb/U channel. */ - V /**< Cr/V/Value channel. */ -}; - -/** Available matrix patterns */ -enum class MatrixPattern -{ - BOX, /**< Box pattern matrix. */ - CROSS, /**< Cross pattern matrix. */ - DISK, /**< Disk pattern matrix. */ - OTHER /**< Any other matrix pattern. */ -}; - -/** Available non linear functions. */ -enum class NonLinearFilterFunction : unsigned -{ - MEDIAN = 0, /**< Non linear median filter. */ - MIN = 1, /**< Non linear erode. */ - MAX = 2, /**< Non linear dilate. */ -}; - /** Available reduction operations */ enum class ReductionOperation { @@ -523,13 +433,23 @@ enum class ArithmeticOperation /** Available element wise unary operations */ enum class ElementWiseUnary { - RSQRT, /**< Reverse square root */ - EXP, /**< Exponential */ - NEG, /**< Negate */ - LOG, /**< Natural Logarithm */ - ABS, /**< Absolute value */ - SIN, /**< Sine */ - ROUND, /**< Round */ + RSQRT, /**< Reverse square root */ + EXP, /**< Exponential */ + NEG, /**< Negate */ + LOG, /**< Natural Logarithm */ + ABS, /**< Absolute value */ + SIN, /**< Sine */ + ROUND, /**< Round */ + LOGICAL_NOT, /**< Logical Not */ +}; + +/** Available bitwise operations */ +enum class BitwiseOperation +{ + AND, /**< Bitwise AND operation */ + NOT, /**< Bitwise NOT operation */ + OR, /**< Bitwise OR operation */ + XOR, /**< Bitwise XOR operation */ }; /** The normalization type used for the normalization layer */ @@ -540,14 +460,6 @@ enum class NormType CROSS_MAP /**< Normalization applied cross maps */ }; -/** Normalization type for Histogram of Oriented Gradients (HOG) */ -enum class HOGNormType -{ - L2_NORM = 1, /**< L2-norm */ - L2HYS_NORM = 2, /**< L2-norm followed by clipping */ - L1_NORM = 3 /**< L1 norm */ -}; - /** Detection window used for the object detection. The detection window keeps the following information: * * -# Geometry of the rectangular window (x/y of top-left corner and width/height) @@ -556,21 +468,12 @@ enum class HOGNormType */ struct DetectionWindow { - uint16_t x{ 0 }; /**< Top-left x coordinate */ - uint16_t y{ 0 }; /**< Top-left y coordinate */ - uint16_t width{ 0 }; /**< Width of the detection window */ - uint16_t height{ 0 }; /**< Height of the detection window */ - uint16_t idx_class{ 0 }; /**< Index of the class */ - float score{ 0.f }; /**< Confidence value for the detection window */ -}; - -/** Dimension rounding type when down-scaling on CNNs - * @note Used in pooling and convolution layer - */ -enum class DimensionRoundingType -{ - FLOOR, /**< Floor rounding */ - CEIL /**< Ceil rounding */ + uint16_t x{0}; /**< Top-left x coordinate */ + uint16_t y{0}; /**< Top-left y coordinate */ + uint16_t width{0}; /**< Width of the detection window */ + uint16_t height{0}; /**< Height of the detection window */ + uint16_t idx_class{0}; /**< Index of the class */ + float score{0.f}; /**< Confidence value for the detection window */ }; /** Available pooling types */ @@ -607,12 +510,28 @@ public: * @param[in] im_width (Optional) Boxes whose centers (on the x axis) is beyond im_width will be filtered. Defaults to 1 * @param[in] im_height (Optional) Boxes whose centers (on the y axis) is beyond im_height will be filtered. Defaults to 1 */ - BoxNMSLimitInfo(float score_thresh = 0.05f, float nms = 0.3f, - int detections = 100, bool soft_nms_enabled = false, - NMSType soft_nms_method = NMSType::LINEAR, - float soft_nms_sigma = 0.5f, float soft_nms_min_score_thres = 0.001f, bool suppress_size = false, float min_size = 1.0f, float im_width = 1.0f, float im_height = 1.0f) - : _score_thresh(score_thresh), _nms(nms), _detections_per_im(detections), _soft_nms_enabled(soft_nms_enabled), _soft_nms_method(soft_nms_method), _soft_nms_sigma(soft_nms_sigma), - _soft_nms_min_score_thres(soft_nms_min_score_thres), _suppress_size(suppress_size), _min_size(min_size), _im_width(im_width), _im_height(im_height) + BoxNMSLimitInfo(float score_thresh = 0.05f, + float nms = 0.3f, + int detections = 100, + bool soft_nms_enabled = false, + NMSType soft_nms_method = NMSType::LINEAR, + float soft_nms_sigma = 0.5f, + float soft_nms_min_score_thres = 0.001f, + bool suppress_size = false, + float min_size = 1.0f, + float im_width = 1.0f, + float im_height = 1.0f) + : _score_thresh(score_thresh), + _nms(nms), + _detections_per_im(detections), + _soft_nms_enabled(soft_nms_enabled), + _soft_nms_method(soft_nms_method), + _soft_nms_sigma(soft_nms_sigma), + _soft_nms_min_score_thres(soft_nms_min_score_thres), + _suppress_size(suppress_size), + _min_size(min_size), + _im_width(im_width), + _im_height(im_height) { } /** Get the score threshold */ @@ -686,120 +605,42 @@ private: }; /** Padding and stride information class */ -class PadStrideInfo +/** Padding information for 2D operations like Conv2d */ +struct Padding2D { -public: - /** Constructor - * - * @param[in] stride_x (Optional) Stride, in elements, across x. Defaults to 1. - * @param[in] stride_y (Optional) Stride, in elements, across y. Defaults to 1. - * @param[in] pad_x (Optional) Padding, in elements, across x. Defaults to 0. - * @param[in] pad_y (Optional) Padding, in elements, across y. Defaults to 0. - * @param[in] round (Optional) Dimensions rounding. Defaults to @ref FLOOR. - */ - PadStrideInfo(unsigned int stride_x = 1, unsigned int stride_y = 1, - unsigned int pad_x = 0, unsigned int pad_y = 0, - DimensionRoundingType round = DimensionRoundingType::FLOOR) - : _stride(std::make_pair(stride_x, stride_y)), - _pad_left(pad_x), - _pad_top(pad_y), - _pad_right(pad_x), - _pad_bottom(pad_y), - _round_type(round) + Padding2D() = default; + Padding2D(size_t left, size_t right, size_t top, size_t bottom) : left(left), right(right), top(top), bottom(bottom) { } - /** Constructor - * - * @param[in] stride_x Stride, in elements, across x. - * @param[in] stride_y Stride, in elements, across y. - * @param[in] pad_left Padding across x on the left, in elements. - * @param[in] pad_top Padding across y on the top, in elements. - * @param[in] pad_right Padding across x on the right, in elements. - * @param[in] pad_bottom Padding across y on the bottom, in elements. - * @param[in] round Dimensions rounding. - */ - PadStrideInfo(unsigned int stride_x, unsigned int stride_y, - unsigned int pad_left, unsigned int pad_right, - unsigned int pad_top, unsigned int pad_bottom, - DimensionRoundingType round) - : _stride(std::make_pair(stride_x, stride_y)), - _pad_left(pad_left), - _pad_top(pad_top), - _pad_right(pad_right), - _pad_bottom(pad_bottom), - _round_type(round) - { - } - /** Get the stride. - * - * @return a pair: stride x, stride y. - */ - std::pair<unsigned int, unsigned int> stride() const - { - return _stride; - } - /** Check whether the padding is symmetric. - * - * @return True if the padding is symmetric. - */ - bool padding_is_symmetric() const - { - return (_pad_left == _pad_right) && (_pad_top == _pad_bottom); - } - /** Get the padding. - * - * @note This should only be used when the padding is symmetric. - * - * @return a pair: padding left/right, padding top/bottom - */ - std::pair<unsigned int, unsigned int> pad() const - { - //this accessor should be used only when padding is symmetric - ARM_COMPUTE_ERROR_ON(!padding_is_symmetric()); - return std::make_pair(_pad_left, _pad_top); - } + size_t left = {0}; /**< Padding across the width dimension on the left, in elements. */ + size_t right = {0}; /**< Padding across the width dimension on the right, in elements. */ + size_t top = {0}; /**< Padding across the height dimension on the top, in elements. */ + size_t bottom = {0}; /**< Padding across the height dimension on the bottom, in elements. */ +}; - /** Get the left padding */ - unsigned int pad_left() const - { - return _pad_left; - } - /** Get the right padding */ - unsigned int pad_right() const - { - return _pad_right; - } - /** Get the top padding */ - unsigned int pad_top() const - { - return _pad_top; - } - /** Get the bottom padding */ - unsigned int pad_bottom() const +/** Padding information for 3D operations like Conv3d */ +struct Padding3D +{ + Padding3D() noexcept { - return _pad_bottom; } - /** Get the rounding type */ - DimensionRoundingType round() const + Padding3D(size_t pad_x, size_t pad_y, size_t pad_z) + : left(pad_x), right(pad_x), top(pad_y), bottom(pad_y), front(pad_z), back(pad_z) { - return _round_type; } - /** Check whether this has any padding */ - bool has_padding() const + Padding3D(size_t left, size_t right, size_t top, size_t bottom, size_t front, size_t back) + : left(left), right(right), top(top), bottom(bottom), front(front), back(back) { - return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); } -private: - std::pair<unsigned int, unsigned int> _stride; - unsigned int _pad_left; - unsigned int _pad_top; - unsigned int _pad_right; - unsigned int _pad_bottom; - - DimensionRoundingType _round_type; + size_t left = {0}; /**< Padding across the width dimenstion on the left, in elements. */ + size_t right = {0}; /**< Padding across the width dimenstion on the right, in elements. */ + size_t top = {0}; /**< Padding across the height dimenstion on the top, in elements. */ + size_t bottom = {0}; /**< Padding across the height dimenstion on the bottom, in elements. */ + size_t front = {0}; /**< Padding across the depth dimenstion on the front, in elements. */ + size_t back = {0}; /**< Padding across the depth dimenstion on the back, in elements. */ }; /** PriorBox layer info */ @@ -831,9 +672,15 @@ public: * @param[in] img_size (Optional) Image size. * @param[in] steps (Optional) Step values. */ - PriorBoxLayerInfo(const std::vector<float> &min_sizes, const std::vector<float> &variances, float offset, bool flip = true, bool clip = false, - const std::vector<float> &max_sizes = {}, const std::vector<float> &aspect_ratios = {}, - const Coordinates2D &img_size = Coordinates2D{ 0, 0 }, const std::array<float, 2> &steps = { { 0.f, 0.f } }) + PriorBoxLayerInfo(const std::vector<float> &min_sizes, + const std::vector<float> &variances, + float offset, + bool flip = true, + bool clip = false, + const std::vector<float> &max_sizes = {}, + const std::vector<float> &aspect_ratios = {}, + const Coordinates2D &img_size = Coordinates2D{0, 0}, + const std::array<float, 2> &steps = {{0.f, 0.f}}) : _min_sizes(min_sizes), _variances(variances), _offset(offset), @@ -845,22 +692,22 @@ public: _steps(steps) { _aspect_ratios.push_back(1.); - for(unsigned int i = 0; i < aspect_ratios.size(); ++i) + for (unsigned int i = 0; i < aspect_ratios.size(); ++i) { float ar = aspect_ratios[i]; bool already_exist = false; - for(auto ar_new : _aspect_ratios) + for (auto ar_new : _aspect_ratios) { - if(fabs(ar - ar_new) < 1e-6) + if (fabs(ar - ar_new) < 1e-6) { already_exist = true; break; } } - if(!already_exist) + if (!already_exist) { _aspect_ratios.push_back(ar); - if(flip) + if (flip) { _aspect_ratios.push_back(1.f / ar); } @@ -914,14 +761,14 @@ public: } private: - std::vector<float> _min_sizes; - std::vector<float> _variances; - float _offset; - bool _flip; - bool _clip; - std::vector<float> _max_sizes; - std::vector<float> _aspect_ratios; - Coordinates2D _img_size; + std::vector<float> _min_sizes; + std::vector<float> _variances; + float _offset; + bool _flip; + bool _clip; + std::vector<float> _max_sizes; + std::vector<float> _aspect_ratios; + Coordinates2D _img_size; std::array<float, 2> _steps; }; @@ -972,8 +819,16 @@ public: * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false. * @param[in] eta (Optional) Eta. */ - DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1, - float confidence_threshold = std::numeric_limits<float>::lowest(), bool variance_encoded_in_target = false, float eta = 1) + DetectionOutputLayerInfo(int num_classes, + bool share_location, + DetectionOutputLayerCodeType code_type, + int keep_top_k, + float nms_threshold, + int top_k = -1, + int background_label_id = -1, + float confidence_threshold = std::numeric_limits<float>::lowest(), + bool variance_encoded_in_target = false, + float eta = 1) : _num_classes(num_classes), _share_location(share_location), _code_type(code_type), @@ -1087,8 +942,15 @@ public: * @param[in] detection_per_class (Optional) Number of detection per class. Used in the Regular Non-Max-Suppression. Defaults to 100. * @param[in] dequantize_scores (Optional) If the scores need to be dequantized. Defaults to true. */ - DetectionPostProcessLayerInfo(unsigned int max_detections, unsigned int max_classes_per_detection, float nms_score_threshold, float iou_threshold, unsigned int num_classes, - std::array<float, 4> scales_values, bool use_regular_nms = false, unsigned int detection_per_class = 100, bool dequantize_scores = true) + DetectionPostProcessLayerInfo(unsigned int max_detections, + unsigned int max_classes_per_detection, + float nms_score_threshold, + float iou_threshold, + unsigned int num_classes, + std::array<float, 4> scales_values, + bool use_regular_nms = false, + unsigned int detection_per_class = 100, + bool dequantize_scores = true) : _max_detections(max_detections), _max_classes_per_detection(max_classes_per_detection), _nms_score_threshold(nms_score_threshold), @@ -1166,15 +1028,15 @@ public: } private: - unsigned int _max_detections; - unsigned int _max_classes_per_detection; - float _nms_score_threshold; - float _iou_threshold; - unsigned int _num_classes; + unsigned int _max_detections; + unsigned int _max_classes_per_detection; + float _nms_score_threshold; + float _iou_threshold; + unsigned int _num_classes; std::array<float, 4> _scales_values; - bool _use_regular_nms; - unsigned int _detection_per_class; - bool _dequantize_scores; + bool _use_regular_nms; + unsigned int _detection_per_class; + bool _dequantize_scores; }; /** Pooling Layer Information struct*/ @@ -1188,7 +1050,9 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo()), exclude_padding(false), is_global_pooling(false), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true), + use_kernel_indices(false) { } /** Constructor @@ -1201,20 +1065,26 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. + * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor. */ explicit PoolingLayerInfo(PoolingType pool_type, unsigned int pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true, + bool use_kernel_indices = false) : pool_type(pool_type), pool_size(Size2D(pool_size, pool_size)), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit), + use_kernel_indices(use_kernel_indices) { } @@ -1228,20 +1098,26 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. + * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor. */ explicit PoolingLayerInfo(PoolingType pool_type, Size2D pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true, + bool use_kernel_indices = false) : pool_type(pool_type), pool_size(pool_size), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit), + use_kernel_indices(use_kernel_indices) { } @@ -1259,7 +1135,9 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo(1, 1, 0, 0)), exclude_padding(false), is_global_pooling(true), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true), + use_kernel_indices(false) { } @@ -1270,6 +1148,111 @@ struct PoolingLayerInfo bool exclude_padding; bool is_global_pooling; bool fp_mixed_precision; + bool use_inf_as_limit; + bool use_kernel_indices; +}; + +/** Pooling Layer Information struct*/ +struct Pooling3dLayerInfo +{ + /** Default Constructor */ + Pooling3dLayerInfo() noexcept + : pool_type(PoolingType::MAX), + pool_size(Size3D()), + stride(Size3D()), + padding(Padding3D()), + exclude_padding(false), + is_global_pooling(false), + fp_mixed_precision(false), + round_type(DimensionRoundingType::FLOOR) + { + } + /** Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. + * @param[in] pool_size Pooling size, in elements, across x, y and z. + * @param[in] stride (Optional) stride information @ref Size3D + * @param[in] padding (Optional) padding information @ref Padding3D + * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. + * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). + * Defaults to false; + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR + */ + explicit Pooling3dLayerInfo(PoolingType pool_type, + unsigned int pool_size, + Size3D stride = Size3D(1U, 1U, 1U), + Padding3D padding = Padding3D(), + bool exclude_padding = false, + bool fp_mixed_precision = false, + DimensionRoundingType round_type = DimensionRoundingType::FLOOR) + : pool_type(pool_type), + pool_size(Size3D(pool_size, pool_size, pool_size)), + stride(stride), + padding(padding), + exclude_padding(exclude_padding), + is_global_pooling(false), + fp_mixed_precision(fp_mixed_precision), + round_type(round_type) + { + } + + /** Constructor + * + * @param[in] pool_type Pooling type @ref PoolingType. + * @param[in] pool_size Pooling size, in elements, across x, y and z. + * @param[in] stride (Optional) stride information @ref Size3D + * @param[in] padding (Optional) padding information @ref Padding3D + * @param[in] exclude_padding (Optional) Strategy when accounting padding in calculations. + * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). + * Defaults to false; + * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] round_type (Optional) Dimensions rounding. Defaults to @ref DimensionRoundingType::FLOOR + */ + explicit Pooling3dLayerInfo(PoolingType pool_type, + Size3D pool_size, + Size3D stride = Size3D(1U, 1U, 1U), + Padding3D padding = Padding3D(), + bool exclude_padding = false, + bool fp_mixed_precision = false, + DimensionRoundingType round_type = DimensionRoundingType::FLOOR) + : pool_type(pool_type), + pool_size(pool_size), + stride(stride), + padding(padding), + exclude_padding(exclude_padding), + is_global_pooling(false), + fp_mixed_precision(fp_mixed_precision), + round_type(round_type) + { + } + + /** Constructor + * + * @note This constructor is used for global pooling + * + * @param[in] pool_type Pooling type @ref PoolingType. + */ + explicit Pooling3dLayerInfo(PoolingType pool_type) + : pool_type(pool_type), + pool_size(Size3D()), + stride(Size3D(1U, 1U, 1U)), + padding(Padding3D(0, 0, 0)), + exclude_padding(false), + is_global_pooling(true), + fp_mixed_precision(false), + round_type(DimensionRoundingType::FLOOR) + { + } + + PoolingType pool_type; + Size3D pool_size; + Size3D stride; + Padding3D padding; + bool exclude_padding; + bool is_global_pooling; + bool fp_mixed_precision; + DimensionRoundingType round_type; }; /** ROI Pooling Layer Information class */ @@ -1283,8 +1266,14 @@ public: * @param[in] spatial_scale Spatial scale to be applied to the ROI coordinates and dimensions. * @param[in] sampling_ratio Number of samples to include in each pooling region (if set to zero, a ceil(roi_dims/pooling_dims)) */ - ROIPoolingLayerInfo(unsigned int pooled_width, unsigned int pooled_height, float spatial_scale, unsigned int sampling_ratio = 0) - : _pooled_width(pooled_width), _pooled_height(pooled_height), _spatial_scale(spatial_scale), _sampling_ratio(sampling_ratio) + ROIPoolingLayerInfo(unsigned int pooled_width, + unsigned int pooled_height, + float spatial_scale, + unsigned int sampling_ratio = 0) + : _pooled_width(pooled_width), + _pooled_height(pooled_height), + _spatial_scale(spatial_scale), + _sampling_ratio(sampling_ratio) { } /** Get the pooled width of the layer */ @@ -1331,10 +1320,24 @@ public: * @param[in] min_size (Optional)Size used to validate the anchors produced. Defaults to 16. * @param[in] values_per_roi (Optional)Values used to represent a ROI(Region of interest). Defaults to 4. */ - GenerateProposalsInfo(float im_width, float im_height, float im_scale, float spatial_scale = 1.0, int pre_nms_topN = 6000, int post_nms_topN = 300, float nms_thres = 0.7, float min_size = 16.0, + GenerateProposalsInfo(float im_width, + float im_height, + float im_scale, + float spatial_scale = 1.0, + int pre_nms_topN = 6000, + int post_nms_topN = 300, + float nms_thres = 0.7, + float min_size = 16.0, size_t values_per_roi = 4) - : _im_height(im_height), _im_width(im_width), _im_scale(im_scale), _spatial_scale(spatial_scale), _pre_nms_topN(pre_nms_topN), _post_nms_topN(post_nms_topN), _nms_thres(nms_thres), - _min_size(min_size), _values_per_roi(values_per_roi) + : _im_height(im_height), + _im_width(im_width), + _im_scale(im_scale), + _spatial_scale(spatial_scale), + _pre_nms_topN(pre_nms_topN), + _post_nms_topN(post_nms_topN), + _nms_thres(nms_thres), + _min_size(min_size), + _values_per_roi(values_per_roi) { } @@ -1460,11 +1463,20 @@ public: * @param[in] correct_transform_coords (Optional)Correct bounding box transform coordinates. Defaults to false * @param[in] bbox_xform_clip (Optional)Minimum bounding box width and height after bounding box transformation in log-space. Defaults to log(1000/16) */ - BoundingBoxTransformInfo(float img_width, float img_height, float scale, bool apply_scale = false, const std::array<float, 4> weights = { { 1.f, 1.f, 1.f, 1.f } }, bool correct_transform_coords = - false, - float bbox_xform_clip = - 4.135166556742356f) - : _img_width(img_width), _img_height(img_height), _scale(scale), _apply_scale(apply_scale), _correct_transform_coords(correct_transform_coords), _weights(weights), _bbox_xform_clip(bbox_xform_clip) + BoundingBoxTransformInfo(float img_width, + float img_height, + float scale, + bool apply_scale = false, + const std::array<float, 4> weights = {{1.f, 1.f, 1.f, 1.f}}, + bool correct_transform_coords = false, + float bbox_xform_clip = 4.135166556742356f) + : _img_width(img_width), + _img_height(img_height), + _scale(scale), + _apply_scale(apply_scale), + _correct_transform_coords(correct_transform_coords), + _weights(weights), + _bbox_xform_clip(bbox_xform_clip) { } @@ -1504,110 +1516,13 @@ public: } private: - float _img_width; - float _img_height; - float _scale; - bool _apply_scale; - bool _correct_transform_coords; + float _img_width; + float _img_height; + float _scale; + bool _apply_scale; + bool _correct_transform_coords; std::array<float, 4> _weights; - float _bbox_xform_clip; -}; - -/** Activation Layer Information class */ -class ActivationLayerInfo -{ -public: - /** Available activation functions */ - enum class ActivationFunction - { - LOGISTIC, /**< Logistic ( \f$ f(x) = \frac{1}{1 + e^{-x}} \f$ ) */ - TANH, /**< Hyperbolic tangent ( \f$ f(x) = a \cdot tanh(b \cdot x) \f$ ) */ - RELU, /**< Rectifier ( \f$ f(x) = max(0,x) \f$ ) */ - BOUNDED_RELU, /**< Upper Bounded Rectifier ( \f$ f(x) = min(a, max(0,x)) \f$ ) */ - LU_BOUNDED_RELU, /**< Lower and Upper Bounded Rectifier ( \f$ f(x) = min(a, max(b,x)) \f$ ) */ - LEAKY_RELU, /**< Leaky Rectifier ( \f$ f(x) = \begin{cases} \alpha x & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ - SOFT_RELU, /**< Soft Rectifier ( \f$ f(x)= log(1+e^x) \f$ ) */ - ELU, /**< Exponential Linear Unit ( \f$ f(x) = \begin{cases} \alpha (exp(x) - 1) & \quad \text{if } x \text{ < 0}\\ x & \quad \text{if } x \geq \text{ 0 } \end{cases} \f$ ) */ - ABS, /**< Absolute ( \f$ f(x)= |x| \f$ ) */ - SQUARE, /**< Square ( \f$ f(x)= x^2 \f$ )*/ - SQRT, /**< Square root ( \f$ f(x) = \sqrt{x} \f$ )*/ - LINEAR, /**< Linear ( \f$ f(x)= ax + b \f$ ) */ - IDENTITY, /**< Identity ( \f$ f(x)= x \f$ ) */ - HARD_SWISH /**< Hard-swish ( \f$ f(x) = (x * relu6(x+3))/6 \f$ ) */ - }; - - ActivationLayerInfo() = default; - /** Default Constructor - * - * @param[in] f The activation function to use. - * @param[in] a (Optional) The alpha parameter used by some activation functions - * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH). - * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH). - */ - ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) - : _act(f), _a(a), _b(b), _enabled(true) - { - } - /** Get the type of activation function */ - ActivationFunction activation() const - { - return _act; - } - /** Get the alpha value */ - float a() const - { - return _a; - } - /** Get the beta value */ - float b() const - { - return _b; - } - /** Check if initialised */ - bool enabled() const - { - return _enabled; - } - -private: - ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::IDENTITY }; - float _a = {}; - float _b = {}; - bool _enabled = { false }; -}; - -/** Fully connected layer info */ -struct FullyConnectedLayerInfo -{ - DataLayout weights_trained_layout{ DataLayout::NCHW }; /**< Layout that the weights have been trained with. */ - bool transpose_weights{ true }; /**< Transpose weights if true. */ - bool are_weights_reshaped{ false }; /**< Reshape the weights tensor if false. */ - bool retain_internal_weights{ false }; /**< Retain internal reshaped weights. */ - bool fp_mixed_precision{ false }; /**< Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. */ - ActivationLayerInfo activation_info{}; /**< Fused activation to apply after the matrix multiplication. */ - - /** Sets the weights trained data layout - * - * @param[in] layout Data layout that the weights were trained with - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_weights_trained_layout(DataLayout layout) - { - weights_trained_layout = layout; - return *this; - } - /** Sets the transpose weights flag - * - * @param[in] should_transpose_weights Boolean flag indicating if weights should be transposed - * - * @return Updated object - */ - FullyConnectedLayerInfo &set_transpose_weights(bool should_transpose_weights) - { - transpose_weights = should_transpose_weights; - return *this; - } + float _bbox_xform_clip; }; /** Normalization Layer Information class */ @@ -1624,7 +1539,12 @@ public: * @param[in] is_scaled (Optional) Boolean that specifies if alpha will be scaled by the normalization size or not. * Should be false to follow [Krichevksy 2012]. */ - NormalizationLayerInfo(NormType type, uint32_t norm_size = 5, float alpha = 0.0001f, float beta = 0.5f, float kappa = 1.f, bool is_scaled = true) + NormalizationLayerInfo(NormType type, + uint32_t norm_size = 5, + float alpha = 0.0001f, + float beta = 0.5f, + float kappa = 1.f, + bool is_scaled = true) : _type(type), _norm_size(norm_size), _alpha(alpha), _beta(beta), _kappa(kappa), _is_scaled(is_scaled) { } @@ -1690,13 +1610,74 @@ private: bool _is_scaled; }; +class StridedSliceLayerInfo +{ +public: + /** Default Constructor + * + * @param[in] begin_mask (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] end_mask (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead. + * @param[in] shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. + */ + StridedSliceLayerInfo(int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0) + : _begin_mask(begin_mask), _end_mask(end_mask), _shrink_axis_mask(shrink_axis_mask) + { + } + + /* Get the begin mask value */ + int32_t begin_mask() const + { + return _begin_mask; + } + + /* Get the end mask value */ + int32_t end_mask() const + { + return _end_mask; + } + + /* Get the shrink axis mask value */ + int32_t shrink_axis_mask() const + { + return _shrink_axis_mask; + } + +private: + int32_t _begin_mask; + int32_t _end_mask; + int32_t _shrink_axis_mask; +}; + +// OHWIo<interleave_by>i<block_by> +inline int interleave_by(const WeightFormat wf) +{ + return (static_cast<int>(wf) >> 8) & 0xFFF; +} +inline int block_by(const WeightFormat wf) +{ + return (static_cast<int>(wf) >> 20) & 0xF; +} +inline bool is_fixed_format(const WeightFormat &wf) +{ + return wf != WeightFormat::UNSPECIFIED && wf != WeightFormat::ANY; +} +inline bool is_fixed_format_fast_math(const WeightFormat &wf) +{ + return (static_cast<int>(wf) >> 4) & 0x1; +} + /** Convolution Layer Weights Information class. This class stores the necessary information to compute convolution layer when the weights are already reshaped */ class WeightsInfo { public: /** Default constructor */ WeightsInfo() - : _are_reshaped(false), _kernel_width(0), _kernel_height(0), _num_kernels(0), _retain_internal_weights(false) + : _are_reshaped(false), + _kernel_width(0), + _kernel_height(0), + _num_kernels(0), + _retain_internal_weights(false), + _weight_format(arm_compute::WeightFormat::UNSPECIFIED) { } /** Constructor @@ -1706,9 +1687,20 @@ public: * @param[in] kernel_height Kernel height. * @param[in] num_kernels Number of convolution kernels. * @param[in] retain_internal_weights (Optional) True if internal reshaped weights must be retained. Used for reconfiguration purposes. Default is false. + * @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED. */ - WeightsInfo(bool are_reshaped, unsigned int kernel_width, unsigned int kernel_height, unsigned int num_kernels, bool retain_internal_weights = false) - : _are_reshaped(are_reshaped), _kernel_width(kernel_width), _kernel_height(kernel_height), _num_kernels(num_kernels), _retain_internal_weights(retain_internal_weights) + WeightsInfo(bool are_reshaped, + unsigned int kernel_width, + unsigned int kernel_height, + unsigned int num_kernels, + bool retain_internal_weights = false, + arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED) + : _are_reshaped(are_reshaped), + _kernel_width(kernel_width), + _kernel_height(kernel_height), + _num_kernels(num_kernels), + _retain_internal_weights(retain_internal_weights), + _weight_format(weight_format) { } /** Flag which specifies if the weights tensor has been reshaped. @@ -1739,22 +1731,40 @@ public: { return _retain_internal_weights; } + arm_compute::WeightFormat weight_format() const + { + return _weight_format; + } + void set_weight_format(arm_compute::WeightFormat weight_format) + { + _weight_format = weight_format; + } + + unsigned int kernel_width() const + { + return _kernel_width; + } + unsigned int kernel_height() const + { + return _kernel_height; + } private: - const bool _are_reshaped; - const unsigned int _kernel_width; - const unsigned int _kernel_height; - const unsigned int _num_kernels; - const bool _retain_internal_weights; + bool _are_reshaped; + unsigned int _kernel_width; + unsigned int _kernel_height; + unsigned int _num_kernels; + bool _retain_internal_weights; + arm_compute::WeightFormat _weight_format; }; /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape. * - * The matrix A can only be reshaped through @ref CLGEMMReshapeLHSMatrixKernel or @ref NEGEMMInterleave4x4Kernel or @ref GCGEMMInterleave4x4Kernel - * Note: Optionally just for @ref CLGEMMReshapeLHSMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block + * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref cpu::kernels::CpuGemmInterleave4x4Kernel + * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block * - * The matrix B can only be reshaped through @ref CLGEMMReshapeRHSMatrixKernel or @ref NEGEMMTranspose1xWKernel or @ref GCGEMMTranspose1xWKernel - * Note: Optionally just for @ref CLGEMMReshapeRHSMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block + * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref cpu::kernels::CpuGemmTranspose1xWKernel + * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block * */ class GEMMReshapeInfo final @@ -1762,7 +1772,14 @@ class GEMMReshapeInfo final public: /** Default constructor */ GEMMReshapeInfo() - : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1), _depth_output_gemm3d(0), _reinterpret_input_as_3d(false), _broadcast_bias(false) + : _m(1), + _n(1), + _k(1), + _mult_transpose1xW_width(1), + _mult_interleave4x4_height(1), + _depth_output_gemm3d(0), + _reinterpret_input_as_3d(false), + _broadcast_bias(false) { } /** Constructor @@ -1778,9 +1795,22 @@ public: * to perform 1x1 convolutions with the NHWC data layout) * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. */ - GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool broadcast_bias = false) - : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height), _depth_output_gemm3d(depth_output_gemm3d), - _reinterpret_input_as_3d(reinterpret_input_as_3d), _broadcast_bias(broadcast_bias) + GEMMReshapeInfo(int m, + int n, + int k, + int mult_transpose1xW_width = 1, + int mult_interleave4x4_height = 1, + int depth_output_gemm3d = 0, + bool reinterpret_input_as_3d = false, + bool broadcast_bias = false) + : _m(m), + _n(n), + _k(k), + _mult_transpose1xW_width(mult_transpose1xW_width), + _mult_interleave4x4_height(mult_interleave4x4_height), + _depth_output_gemm3d(depth_output_gemm3d), + _reinterpret_input_as_3d(reinterpret_input_as_3d), + _broadcast_bias(broadcast_bias) { } /** Number of matrix A rows @@ -1852,45 +1882,14 @@ public: }; private: - const int _m; - const int _n; - const int _k; - const int _mult_transpose1xW_width; - const int _mult_interleave4x4_height; - const int _depth_output_gemm3d; - const bool _reinterpret_input_as_3d; - const bool _broadcast_bias; -}; - -struct DepthwiseConvolutionReshapeInfo -{ - unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */ - bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */ -}; - -/** GEMMLowp output stage type */ -enum class GEMMLowpOutputStageType -{ - NONE, /**< No quantization */ - QUANTIZE_DOWN, /**< Quantize using an integer multiplication */ - QUANTIZE_DOWN_FIXEDPOINT, /**< Quantize using a fixed point multiplication */ - QUANTIZE_DOWN_FLOAT /**< Quantize using a floating point multiplication */ -}; - -/** GEMMLowp output stage info */ -struct GEMMLowpOutputStageInfo -{ - GEMMLowpOutputStageType type{ GEMMLowpOutputStageType::NONE }; /**< GEMMLowp output stage type */ - int32_t gemmlowp_offset{ 0 }; /**< GEMMLowp output stage offset used for quantizing to QASYMM8 */ - int32_t gemmlowp_multiplier{ 0 }; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - int32_t gemmlowp_shift{ 0 }; /**< GEMMLowp output stage shift used for quantizing to uint8 */ - int32_t gemmlowp_min_bound{ std::numeric_limits<int32_t>::lowest() }; /**< GEMMLowp min value used to saturate down the output result before converting back to QASYMM8 */ - int32_t gemmlowp_max_bound{ std::numeric_limits<int32_t>::max() }; /**< GEMMLowp max value used to saturate down the output result before converting back to QASYMM8 */ - std::vector<int32_t> gemmlowp_multipliers{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - std::vector<int32_t> gemmlowp_shifts{}; /**< GEMMLowp output stage multiplier used for quantizing to QASYMM8 */ - float gemmlowp_real_multiplier{ 0 }; /**< GEMMLowp output stage real multiplier used for quantizing to QASYMM8 */ - bool is_quantized_per_channel{ false }; /**< GEMMLowp quantized per-channel flag */ - DataType output_data_type{ DataType::UNKNOWN }; /**< Output tensor data type to use if the output is not initialized */ + int _m; + int _n; + int _k; + int _mult_transpose1xW_width; + int _mult_interleave4x4_height; + int _depth_output_gemm3d; + bool _reinterpret_input_as_3d; + bool _broadcast_bias; }; /** GEMM LHS (Left Hand Side) matrix information */ @@ -1901,211 +1900,31 @@ struct GEMMLHSMatrixInfo : m0(m), k0(k), v0(v), transpose(trans), interleave(inter) { } - unsigned int m0{ 1 }; /**< Number of rows processed by the matrix multiplication */ - unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ - unsigned int v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ - bool transpose{ true }; /**< True if the (m0xk0) block has to be transposed before been stored */ - bool interleave{ true }; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */ + unsigned int m0{1}; /**< Number of rows processed by the matrix multiplication */ + unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */ + unsigned int v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + bool transpose{true}; /**< True if the (m0xk0) block has to be transposed before been stored */ + bool interleave{true}; /**< True if the v0 (m0xk0) blocks have to be interleaved in the output row */ }; /** GEMM RHS (Right Hand Side) matrix information */ struct GEMMRHSMatrixInfo { GEMMRHSMatrixInfo() = default; - GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter) - : n0(n), k0(k), h0(h), transpose(trans), interleave(inter) + GEMMRHSMatrixInfo(unsigned int n, unsigned int k, unsigned int h, bool trans, bool inter, bool export_to_cl_img) + : n0(n), k0(k), h0(h), transpose(trans), interleave(inter), export_to_cl_image(export_to_cl_img) { } - unsigned int n0{ 1 }; /**< Number of columns processed by the matrix multiplication */ - unsigned int k0{ 1 }; /**< Number of partial accumulations performed by the matrix multiplication */ - unsigned int h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool transpose{ true }; /**< True if the (k0xn0) block has to be transposed before been stored */ - bool interleave{ true }; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */ - bool export_to_cl_image{ false }; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ + unsigned int n0{1}; /**< Number of columns processed by the matrix multiplication */ + unsigned int k0{1}; /**< Number of partial accumulations performed by the matrix multiplication */ + unsigned int h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool transpose{true}; /**< True if the (k0xn0) block has to be transposed before been stored */ + bool interleave{true}; /**< True if the h0 (k0xn0) blocks have to be interleaved in the output row */ + bool export_to_cl_image{ + false}; /**< True if the reshaped rhs has to be exported to cl_image. n0 must be equal to 4 */ }; -/** GEMM information class. This class stores the necessary information to compute GEMM functions - * - * This object also contains the information about how matrix A and matrix B have been reshaped - * - */ -class GEMMInfo -{ -public: - /** Default constructor */ - GEMMInfo() noexcept - : _is_a_reshaped(false), - _is_b_reshaped(false), - _reshape_b_only_on_first_run(true), - _depth_output_gemm3d(0), - _reinterpret_input_as_3d(false), - _retain_internal_weights(false), - _gemmlowp_output_stage(), - _fp_mixed_precision(false), - _broadcast_bias(false), - _pretranpose_B(true), - _activation_info() - { - } - /** Constructor - * - * @param[in] is_a_reshaped True if the matrix A has been reshaped - * @param[in] is_b_reshaped True if the matrix B has been reshaped - * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run - * @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel - * If 0 the output will not be reinterpreted as 3D. Default 0 - * @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used - * to perform 1x1 convolutions with the NHWC data layout) - * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run - * @param[in] gemmlowp_output_stage (Optional) GEMMLowp Output stage info - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. - * @param[in] broadcast_bias (Optional) Broadcast the shape of the bias tensor from a vector to a matrix. - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - */ - GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 0, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false, - GEMMLowpOutputStageInfo gemmlowp_output_stage = GEMMLowpOutputStageInfo(), bool fp_mixed_precision = false, bool broadcast_bias = false, - const ActivationLayerInfo &activation_info = ActivationLayerInfo()) noexcept - : _is_a_reshaped(is_a_reshaped), - _is_b_reshaped(is_b_reshaped), - _reshape_b_only_on_first_run(reshape_b_only_on_first_run), - _depth_output_gemm3d(depth_output_gemm3d), - _reinterpret_input_as_3d(reinterpret_input_as_3d), - _retain_internal_weights(retain_internal_weights), - _gemmlowp_output_stage(gemmlowp_output_stage), - _fp_mixed_precision(fp_mixed_precision), - _broadcast_bias(broadcast_bias), - _pretranpose_B(reshape_b_only_on_first_run), - _activation_info(activation_info) - { - } - /** Flag which specifies if the matrix A has been reshaped - * - * @return True if the matrix A has been reshaped - */ - bool is_a_reshaped() const - { - return _is_a_reshaped; - }; - /** Flag which specifies if the matrix B has been reshaped - * - * @return True if the matrix B has been reshaped - */ - bool is_b_reshaped() const - { - return _is_b_reshaped; - }; - /** Flag which specifies if the reshape of matrix B should executed only for the first - * - * @note This flag could be set to TRUE when GEMM is used to accelerate convolution layer - * - * @return True if the reshaped of matrix B happens only for the first run - */ - bool reshape_b_only_on_first_run() const - { - return _reshape_b_only_on_first_run; - }; - /** Depth of the output when GEMM output is reinterpreted as 3D tensor - * - * @return the depth of the output tensor - */ - int depth_output_gemm3d() const - { - return _depth_output_gemm3d; - }; - /** Flag which specifies if the input tensor has to be reinterpreted as 3D - * - * @return True if the input tensor has to be reinterpreted as 3D tensor - */ - bool reinterpret_input_as_3d() const - { - return _reinterpret_input_as_3d; - }; - /** Flag which specifies if the weights tensor has to be retained from previous run - * - * @return True if the weights tensor has to be retained - */ - bool retain_internal_weights() const - { - return _retain_internal_weights; - }; - /** GEMMLowp output stage - * - * @return the GEMMLowp output stage info - */ - GEMMLowpOutputStageInfo gemmlowp_output_stage() const - { - return _gemmlowp_output_stage; - }; - /** Sets GEMMLowp output stage - * - * @param[in] output_stage Output stage to set - */ - void set_gemmlowp_output_stage(GEMMLowpOutputStageInfo &output_stage) - { - _gemmlowp_output_stage = output_stage; - }; - /** Flag which specifies if a wider accumulator should be used. - * - * @return True if a wider accumulator has to be used - */ - bool fp_mixed_precision() const - { - return _fp_mixed_precision; - }; - /** Flag which specifies whether to broadcast the shape of the bias tensor. - * - * @return True if the shape of the bias tensor is to be broadcasted. - */ - bool broadcast_bias() const - { - return _broadcast_bias; - }; - /** Flag which specifies whether b should be pre-transposed if supported. - * - * @return True if b should be pre-transposed else false. - */ - bool pretranpose_B() const - { - return _pretranpose_B; - }; - /** Set pre-transpose b flag - * - * @param[in] flag Flag to set - */ - void set_pretranpose_B(bool flag) - { - _pretranpose_B = flag; - } - /** Activation layer to apply after the matrix multiplication - * - * @return ActivationLayerInfo object - */ - ActivationLayerInfo activation_info() const - { - return _activation_info; - } - /** Set activation layer info - * - * @param[in] activation_info ActivationLayerInfo object to set - */ - void set_activation_info(const ActivationLayerInfo &activation_info) - { - _activation_info = activation_info; - } - -private: - bool _is_a_reshaped; - bool _is_b_reshaped; - bool _reshape_b_only_on_first_run; - int _depth_output_gemm3d; - bool _reinterpret_input_as_3d; - bool _retain_internal_weights; - GEMMLowpOutputStageInfo _gemmlowp_output_stage; - bool _fp_mixed_precision; - bool _broadcast_bias; - bool _pretranpose_B; - ActivationLayerInfo _activation_info; -}; +class ITensorInfo; /** Winograd information */ struct WinogradInfo @@ -2118,16 +1937,23 @@ struct WinogradInfo * @param[in] conv_info Convolution info (Pads, strides) * @param[in] data_layout Data layout to use for the output tensor once the convolution has been applied */ - WinogradInfo(Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) - : output_tile_size(output_tile_sz), kernel_size(kernel_sz), input_dimensions(input_dims), convolution_info(conv_info), output_data_layout(data_layout) - { - } - - Size2D output_tile_size{}; /**< Width and height of the output tile */ - Size2D kernel_size{}; /**< Width and height of the kernel*/ - Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ - PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ - DataLayout output_data_layout{ DataLayout::NCHW }; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ + WinogradInfo( + Size2D output_tile_sz, Size2D kernel_sz, Size2D input_dims, PadStrideInfo conv_info, DataLayout data_layout) + : output_tile_size(output_tile_sz), + kernel_size(kernel_sz), + input_dimensions(input_dims), + convolution_info(conv_info), + output_data_layout(data_layout) + { + } + + Size2D output_tile_size{}; /**< Width and height of the output tile */ + Size2D kernel_size{}; /**< Width and height of the kernel*/ + Size2D input_dimensions{}; /**< Width and height of the input tensor before the convolution is applied */ + PadStrideInfo convolution_info{}; /**< Convolution info (Pads, strides,...) */ + DataLayout output_data_layout{ + DataLayout:: + NCHW}; /**< Data layout to use for the output tensor once the convolution has been applied (NCHW or NHWC) */ }; /** IO formatting information class*/ @@ -2186,5 +2012,8 @@ struct IOFormatInfo /** Align columns */ bool align_columns; }; + +/** Class for holding information related to cropping */ +using CropInfo = Padding2D; } // namespace arm_compute -#endif /* ARM_COMPUTE_TYPES_H */ +#endif // ACL_ARM_COMPUTE_CORE_TYPES_H diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index eff6157b1f..a2146522f7 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,63 +26,29 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Rounding.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Version.h" -#include <algorithm> -#include <cstdint> -#include <cstdlib> -#include <iomanip> +#include <cmath> #include <numeric> #include <sstream> #include <string> #include <type_traits> +#include <unordered_map> #include <utility> -#include <vector> -namespace arm_compute -{ -/** Calculate the rounded up quotient of val / m. - * - * @param[in] val Value to divide and round up. - * @param[in] m Value to divide by. - * - * @return the result. - */ -template <typename S, typename T> -constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) -{ - return (val + m - 1) / m; -} - -/** Computes the smallest number larger or equal to value that is a multiple of divisor. - * - * @param[in] value Lower bound value - * @param[in] divisor Value to compute multiple of. - * - * @return the result. - */ -template <typename S, typename T> -inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) -{ - ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); - return DIV_CEIL(value, divisor) * divisor; -} +/* Convenience / backwards compatibility includes */ +#include "arm_compute/core/utils/ActivationFunctionUtils.h" +#include "arm_compute/core/utils/DataLayoutUtils.h" +#include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/core/utils/FormatUtils.h" +#include "arm_compute/core/utils/InterpolationPolicyUtils.h" +#include "arm_compute/core/utils/StringUtils.h" -/** Computes the largest number smaller or equal to value that is a multiple of divisor. - * - * @param[in] value Upper bound value - * @param[in] divisor Value to compute multiple of. - * - * @return the result. - */ -template <typename S, typename T> -inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +namespace arm_compute { - ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); - return (value / divisor) * divisor; -} +class ITensor; +class ITensorInfo; +class ActivationLayerInfo; /** Load an entire file in memory * @@ -93,814 +59,6 @@ inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) */ std::string read_file(const std::string &filename, bool binary); -/** The size in bytes of the data type - * - * @param[in] data_type Input data type - * - * @return The size in bytes of the data type - */ -inline size_t data_size_from_type(DataType data_type) -{ - switch(data_type) - { - case DataType::U8: - case DataType::S8: - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - return 1; - case DataType::U16: - case DataType::S16: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - return 2; - case DataType::F32: - case DataType::U32: - case DataType::S32: - return 4; - case DataType::F64: - case DataType::U64: - case DataType::S64: - return 8; - case DataType::SIZET: - return sizeof(size_t); - default: - ARM_COMPUTE_ERROR("Invalid data type"); - return 0; - } -} - -/** The size in bytes of the pixel format - * - * @param[in] format Input format - * - * @return The size in bytes of the pixel format - */ -inline size_t pixel_size_from_format(Format format) -{ - switch(format) - { - case Format::U8: - return 1; - case Format::U16: - case Format::S16: - case Format::BFLOAT16: - case Format::F16: - case Format::UV88: - case Format::YUYV422: - case Format::UYVY422: - return 2; - case Format::RGB888: - return 3; - case Format::RGBA8888: - return 4; - case Format::U32: - case Format::S32: - case Format::F32: - return 4; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - ARM_COMPUTE_ERROR("Undefined pixel size for given format"); - return 0; - } -} - -/** The size in bytes of the data type - * - * @param[in] dt Input data type - * - * @return The size in bytes of the data type - */ -inline size_t element_size_from_data_type(DataType dt) -{ - switch(dt) - { - case DataType::S8: - case DataType::U8: - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - return 1; - case DataType::U16: - case DataType::S16: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - return 2; - case DataType::U32: - case DataType::S32: - case DataType::F32: - return 4; - default: - ARM_COMPUTE_ERROR("Undefined element size for given data type"); - return 0; - } -} - -/** Return the data type used by a given single-planar pixel format - * - * @param[in] format Input format - * - * @return The size in bytes of the pixel format - */ -inline DataType data_type_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::UV88: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return DataType::U8; - case Format::U16: - return DataType::U16; - case Format::S16: - return DataType::S16; - case Format::U32: - return DataType::U32; - case Format::S32: - return DataType::S32; - case Format::BFLOAT16: - return DataType::BFLOAT16; - case Format::F16: - return DataType::F16; - case Format::F32: - return DataType::F32; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - ARM_COMPUTE_ERROR("Not supported data_type for given format"); - return DataType::UNKNOWN; - } -} - -/** Return the plane index of a given channel given an input format. - * - * @param[in] format Input format - * @param[in] channel Input channel - * - * @return The plane index of the specific channel of the specific format - */ -inline int plane_idx_from_channel(Format format, Channel channel) -{ - switch(format) - { - // Single planar formats have a single plane - case Format::U8: - case Format::U16: - case Format::S16: - case Format::U32: - case Format::S32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - case Format::UV88: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return 0; - // Multi planar formats - case Format::NV12: - case Format::NV21: - { - // Channel U and V share the same plane of format UV88 - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - case Channel::V: - return 1; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::IYUV: - case Format::YUV444: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the channel index of a given channel given an input format. - * - * @param[in] format Input format - * @param[in] channel Input channel - * - * @return The channel index of the specific channel of the specific format - */ -inline int channel_idx_from_format(Format format, Channel channel) -{ - switch(format) - { - case Format::RGB888: - { - switch(channel) - { - case Channel::R: - return 0; - case Channel::G: - return 1; - case Channel::B: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::RGBA8888: - { - switch(channel) - { - case Channel::R: - return 0; - case Channel::G: - return 1; - case Channel::B: - return 2; - case Channel::A: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::YUYV422: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::UYVY422: - { - switch(channel) - { - case Channel::Y: - return 1; - case Channel::U: - return 0; - case Channel::V: - return 2; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::NV12: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 0; - case Channel::V: - return 1; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::NV21: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 1; - case Channel::V: - return 0; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - case Format::YUV444: - case Format::IYUV: - { - switch(channel) - { - case Channel::Y: - return 0; - case Channel::U: - return 0; - case Channel::V: - return 0; - default: - ARM_COMPUTE_ERROR("Not supported channel"); - return 0; - } - } - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the number of planes for a given format - * - * @param[in] format Input format - * - * @return The number of planes for a given image format. - */ -inline size_t num_planes_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::S16: - case Format::U16: - case Format::S32: - case Format::U32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - return 1; - case Format::NV12: - case Format::NV21: - return 2; - case Format::IYUV: - case Format::YUV444: - return 3; - default: - ARM_COMPUTE_ERROR("Not supported format"); - return 0; - } -} - -/** Return the number of channels for a given single-planar pixel format - * - * @param[in] format Input format - * - * @return The number of channels for a given image format. - */ -inline size_t num_channels_from_format(Format format) -{ - switch(format) - { - case Format::U8: - case Format::U16: - case Format::S16: - case Format::U32: - case Format::S32: - case Format::BFLOAT16: - case Format::F16: - case Format::F32: - return 1; - // Because the U and V channels are subsampled - // these formats appear like having only 2 channels: - case Format::YUYV422: - case Format::UYVY422: - return 2; - case Format::UV88: - return 2; - case Format::RGB888: - return 3; - case Format::RGBA8888: - return 4; - //Doesn't make sense for planar formats: - case Format::NV12: - case Format::NV21: - case Format::IYUV: - case Format::YUV444: - default: - return 0; - } -} - -/** Return the promoted data type of a given data type. - * - * @note If promoted data type is not supported an error will be thrown - * - * @param[in] dt Data type to get the promoted type of. - * - * @return Promoted data type - */ -inline DataType get_promoted_data_type(DataType dt) -{ - switch(dt) - { - case DataType::U8: - return DataType::U16; - case DataType::S8: - return DataType::S16; - case DataType::U16: - return DataType::U32; - case DataType::S16: - return DataType::S32; - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - case DataType::QASYMM16: - case DataType::BFLOAT16: - case DataType::F16: - case DataType::U32: - case DataType::S32: - case DataType::F32: - ARM_COMPUTE_ERROR("Unsupported data type promotions!"); - default: - ARM_COMPUTE_ERROR("Undefined data type!"); - } - return DataType::UNKNOWN; -} - -/** Compute the mininum and maximum values a data type can take - * - * @param[in] dt Data type to get the min/max bounds of - * - * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue. - */ -inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) -{ - PixelValue min{}; - PixelValue max{}; - switch(dt) - { - case DataType::U8: - case DataType::QASYMM8: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max())); - break; - } - case DataType::S8: - case DataType::QSYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max())); - break; - } - case DataType::U16: - case DataType::QASYMM16: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max())); - break; - } - case DataType::S16: - case DataType::QSYMM16: - { - min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest())); - max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max())); - break; - } - case DataType::U32: - { - min = PixelValue(std::numeric_limits<uint32_t>::lowest()); - max = PixelValue(std::numeric_limits<uint32_t>::max()); - break; - } - case DataType::S32: - { - min = PixelValue(std::numeric_limits<int32_t>::lowest()); - max = PixelValue(std::numeric_limits<int32_t>::max()); - break; - } - case DataType::BFLOAT16: - { - min = PixelValue(bfloat16::lowest()); - max = PixelValue(bfloat16::max()); - break; - } - case DataType::F16: - { - min = PixelValue(std::numeric_limits<half>::lowest()); - max = PixelValue(std::numeric_limits<half>::max()); - break; - } - case DataType::F32: - { - min = PixelValue(std::numeric_limits<float>::lowest()); - max = PixelValue(std::numeric_limits<float>::max()); - break; - } - default: - ARM_COMPUTE_ERROR("Undefined data type!"); - } - return std::make_tuple(min, max); -} - -/** Return true if the given format has horizontal subsampling. - * - * @param[in] format Format to determine subsampling. - * - * @return True if the format can be subsampled horizontaly. - */ -inline bool has_format_horizontal_subsampling(Format format) -{ - return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false; -} - -/** Return true if the given format has vertical subsampling. - * - * @param[in] format Format to determine subsampling. - * - * @return True if the format can be subsampled verticaly. - */ -inline bool has_format_vertical_subsampling(Format format) -{ - return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) ? true : false; -} - -/** Separate a 2D convolution into two 1D convolutions - * - * @param[in] conv 2D convolution - * @param[out] conv_col 1D vertical convolution - * @param[out] conv_row 1D horizontal convolution - * @param[in] size Size of the 2D convolution - * - * @return true if the separation was successful - */ -inline bool separate_matrix(const int16_t *conv, int16_t *conv_col, int16_t *conv_row, uint8_t size) -{ - int32_t min_col = -1; - int16_t min_col_val = -1; - - for(int32_t i = 0; i < size; ++i) - { - if(conv[i] != 0 && (min_col < 0 || abs(min_col_val) > abs(conv[i]))) - { - min_col = i; - min_col_val = conv[i]; - } - } - - if(min_col < 0) - { - return false; - } - - for(uint32_t j = 0; j < size; ++j) - { - conv_col[j] = conv[min_col + j * size]; - } - - for(uint32_t i = 0; i < size; i++) - { - if(static_cast<int>(i) == min_col) - { - conv_row[i] = 1; - } - else - { - int16_t coeff = conv[i] / conv[min_col]; - - for(uint32_t j = 1; j < size; ++j) - { - if(conv[i + j * size] != (conv_col[j] * coeff)) - { - return false; - } - } - - conv_row[i] = coeff; - } - } - - return true; -} - -/** Calculate the scale of the given square matrix - * - * The scale is the absolute value of the sum of all the coefficients in the matrix. - * - * @note If the coefficients add up to 0 then the scale is set to 1. - * - * @param[in] matrix Matrix coefficients - * @param[in] matrix_size Number of elements per side of the square matrix. (Number of coefficients = matrix_size * matrix_size). - * - * @return The absolute value of the sum of the coefficients if they don't add up to 0, otherwise 1. - */ -inline uint32_t calculate_matrix_scale(const int16_t *matrix, unsigned int matrix_size) -{ - const size_t size = matrix_size * matrix_size; - - return std::max(1, std::abs(std::accumulate(matrix, matrix + size, 0))); -} - -/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats. - * - * @note Adding here a few links discussing the issue of odd size and sharing the same solution: - * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a> - * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a> - * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&can=1&q=odd%20width">libYUV</a> - * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> * - * - * @param[in, out] shape Tensor shape of 2D size - * @param[in] format Format of the tensor - * - * @return The adjusted tensor shape. - */ -inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format) -{ - TensorShape output{ shape }; - - // Force width to be even for formats which require subsampling of the U and V channels - if(has_format_horizontal_subsampling(format)) - { - output.set(0, output.x() & ~1U); - } - - // Force height to be even for formats which require subsampling of the U and V channels - if(has_format_vertical_subsampling(format)) - { - output.set(1, output.y() & ~1U); - } - - return output; -} - -/** Calculate subsampled shape for a given format and channel - * - * @param[in] shape Shape of the tensor to calculate the extracted channel. - * @param[in] format Format of the tensor. - * @param[in] channel Channel to create tensor shape to be extracted. - * - * @return The subsampled tensor shape. - */ -inline TensorShape calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN) -{ - TensorShape output{ shape }; - - // Subsample shape only for U or V channel - if(Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel) - { - // Subsample width for the tensor shape when channel is U or V - if(has_format_horizontal_subsampling(format)) - { - output.set(0, output.x() / 2U); - } - - // Subsample height for the tensor shape when channel is U or V - if(has_format_vertical_subsampling(format)) - { - output.set(1, output.y() / 2U); - } - } - - return output; -} - -/** Calculate accurary required by the horizontal and vertical convolution computations - * - * @param[in] conv_col Pointer to the vertical vector of the separated convolution filter - * @param[in] conv_row Pointer to the horizontal vector of the convolution filter - * @param[in] size Number of elements per vector of the separated matrix - * - * @return The return type is a pair. The first element of the pair is the biggest data type needed for the first stage. The second - * element of the pair is the biggest data type needed for the second stage. - */ -inline std::pair<DataType, DataType> data_type_for_convolution(const int16_t *conv_col, const int16_t *conv_row, size_t size) -{ - DataType first_stage = DataType::UNKNOWN; - DataType second_stage = DataType::UNKNOWN; - - auto gez = [](const int16_t &v) - { - return v >= 0; - }; - - auto accu_neg = [](const int &first, const int &second) - { - return first + (second < 0 ? second : 0); - }; - - auto accu_pos = [](const int &first, const int &second) - { - return first + (second > 0 ? second : 0); - }; - - const bool only_positive_coefficients = std::all_of(conv_row, conv_row + size, gez) && std::all_of(conv_col, conv_col + size, gez); - - if(only_positive_coefficients) - { - const int max_row_value = std::accumulate(conv_row, conv_row + size, 0) * UINT8_MAX; - const int max_value = std::accumulate(conv_col, conv_col + size, 0) * max_row_value; - - first_stage = (max_row_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; - - second_stage = (max_value <= UINT16_MAX) ? DataType::U16 : DataType::S32; - } - else - { - const int min_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_neg) * UINT8_MAX; - const int max_row_value = std::accumulate(conv_row, conv_row + size, 0, accu_pos) * UINT8_MAX; - const int neg_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_neg); - const int pos_coeffs_sum = std::accumulate(conv_col, conv_col + size, 0, accu_pos); - const int min_value = neg_coeffs_sum * max_row_value + pos_coeffs_sum * min_row_value; - const int max_value = neg_coeffs_sum * min_row_value + pos_coeffs_sum * max_row_value; - - first_stage = ((INT16_MIN <= min_row_value) && (max_row_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; - - second_stage = ((INT16_MIN <= min_value) && (max_value <= INT16_MAX)) ? DataType::S16 : DataType::S32; - } - - return std::make_pair(first_stage, second_stage); -} - -/** Calculate the accuracy required by the squared convolution calculation. - * - * - * @param[in] conv Pointer to the squared convolution matrix - * @param[in] size The total size of the convolution matrix - * - * @return The return is the biggest data type needed to do the convolution - */ -inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t size) -{ - auto gez = [](const int16_t v) - { - return v >= 0; - }; - - const bool only_positive_coefficients = std::all_of(conv, conv + size, gez); - - if(only_positive_coefficients) - { - const int max_conv_value = std::accumulate(conv, conv + size, 0) * UINT8_MAX; - if(max_conv_value <= UINT16_MAX) - { - return DataType::U16; - } - else - { - return DataType::S32; - } - } - else - { - const int min_value = std::accumulate(conv, conv + size, 0, [](int a, int b) - { - return b < 0 ? a + b : a; - }) - * UINT8_MAX; - - const int max_value = std::accumulate(conv, conv + size, 0, [](int a, int b) - { - return b > 0 ? a + b : a; - }) - * UINT8_MAX; - - if((INT16_MIN <= min_value) && (INT16_MAX >= max_value)) - { - return DataType::S16; - } - else - { - return DataType::S32; - } - } -} - /** Permutes the given dimensions according the permutation vector * * @param[in,out] dimensions Dimensions to be permuted. @@ -911,7 +69,7 @@ template <typename T> inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector &perm) { const auto old_dim = utility::make_array<Dimensions<T>::num_max_dimensions>(dimensions.begin(), dimensions.end()); - for(unsigned int i = 0; i < perm.num_dimensions(); ++i) + for (unsigned int i = 0; i < perm.num_dimensions(); ++i) { T dimension_val = old_dim[i]; dimensions.set(perm[i], dimension_val); @@ -929,7 +87,11 @@ inline void permute_strides(Dimensions<T> &dimensions, const PermutationVector & * * @return PadStrideInfo for SAME padding */ -PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info, DataLayout data_layout = DataLayout::NCHW, const Size2D &dilation = Size2D(1u, 1u), +PadStrideInfo calculate_same_pad(TensorShape input_shape, + TensorShape weights_shape, + PadStrideInfo conv_info, + DataLayout data_layout = DataLayout::NCHW, + const Size2D &dilation = Size2D(1u, 1u), const DimensionRoundingType &rounding_type = DimensionRoundingType::FLOOR); /** Returns expected width and height of the deconvolution's output tensor. @@ -942,8 +104,10 @@ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_sh * * @return A pair with the new width in the first position and the new height in the second. */ -std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, - unsigned int kernel_width, unsigned int kernel_height, +std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned int in_width, + unsigned int in_height, + unsigned int kernel_width, + unsigned int kernel_height, const PadStrideInfo &pad_stride_info); /** Returns expected width and height of output scaled tensor depending on dimensions rounding mode. @@ -957,11 +121,47 @@ std::pair<unsigned int, unsigned int> deconvolution_output_dimensions(unsigned i * * @return A pair with the new width in the first position and the new height in the second. */ -std::pair<unsigned int, unsigned int> scaled_dimensions(int width, int height, - int kernel_width, int kernel_height, +std::pair<unsigned int, unsigned int> scaled_dimensions(int width, + int height, + int kernel_width, + int kernel_height, const PadStrideInfo &pad_stride_info, const Size2D &dilation = Size2D(1U, 1U)); +/** Returns calculated width and height of output scaled tensor depending on dimensions rounding mode. + * + * @param[in] width Width of input tensor (Number of columns) + * @param[in] height Height of input tensor (Number of rows) + * @param[in] kernel_width Kernel width. + * @param[in] kernel_height Kernel height. + * @param[in] pad_stride_info Pad and stride information. + * + * @return A pair with the new width in the first position and the new height in the second, returned values can be < 1 + */ +std::pair<int, int> scaled_dimensions_signed( + int width, int height, int kernel_width, int kernel_height, const PadStrideInfo &pad_stride_info); + +/** Returns calculated width, height and depth of output scaled tensor depending on dimensions rounding mode. + * + * @param[in] width Width of input tensor + * @param[in] height Height of input tensor + * @param[in] depth Depth of input tensor + * @param[in] kernel_width Kernel width. + * @param[in] kernel_height Kernel height. + * @param[in] kernel_depth Kernel depth. + * @param[in] pool3d_info Pad and stride and round information for 3d pooling + * + * @return A tuple with the new width in the first position, the new height in the second, and the new depth in the third. + * Returned values can be < 1 + */ +std::tuple<int, int, int> scaled_3d_dimensions_signed(int width, + int height, + int depth, + int kernel_width, + int kernel_height, + int kernel_depth, + const Pooling3dLayerInfo &pool3d_info); + /** Check if the given reduction operation should be handled in a serial way. * * @param[in] op Reduction operation to perform @@ -981,16 +181,6 @@ bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int */ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool is_log); -/** Returns resize ratio between input and output with consideration of aligned corners - * - * @param[in] input_size The input size - * @param[in] output_size the output size - * @param[in] align_corners True to align corners of input and output. Defaults to false. - * - * @return The ratio between input and output (i.e., the input size divided by the output size) - */ -float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_corners = false); - /** Returns a pair of minimum and maximum values for a quantized activation * * @param[in] act_info The information for activation @@ -999,15 +189,9 @@ float calculate_resize_ratio(size_t input_size, size_t output_size, bool align_c * * @return The pair with minimum and maximum values */ -std::pair<int32_t, int32_t> get_quantized_activation_min_max(ActivationLayerInfo act_info, DataType data_type, UniformQuantizationInfo oq_info); - -/** Convert a tensor format into a string. - * - * @param[in] format @ref Format to be translated to string. - * - * @return The string describing the format. - */ -const std::string &string_from_format(Format format); +std::pair<int32_t, int32_t> get_quantized_activation_min_max(const ActivationLayerInfo &act_info, + DataType data_type, + UniformQuantizationInfo oq_info); /** Convert a channel identity into a string. * @@ -1016,48 +200,7 @@ const std::string &string_from_format(Format format); * @return The string describing the channel. */ const std::string &string_from_channel(Channel channel); -/** Convert a data layout identity into a string. - * - * @param[in] dl @ref DataLayout to be translated to string. - * - * @return The string describing the data layout. - */ -const std::string &string_from_data_layout(DataLayout dl); -/** Convert a data type identity into a string. - * - * @param[in] dt @ref DataType to be translated to string. - * - * @return The string describing the data type. - */ -const std::string &string_from_data_type(DataType dt); -/** Convert a matrix pattern into a string. - * - * @param[in] pattern @ref MatrixPattern to be translated to string. - * - * @return The string describing the matrix pattern. - */ -const std::string &string_from_matrix_pattern(MatrixPattern pattern); -/** Translates a given activation function to a string. - * - * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. - * - * @return The string describing the activation function. - */ -const std::string &string_from_activation_func(ActivationLayerInfo::ActivationFunction act); -/** Translates a given non linear function to a string. - * - * @param[in] function @ref NonLinearFilterFunction to be translated to string. - * - * @return The string describing the non linear function. - */ -const std::string &string_from_non_linear_filter_function(NonLinearFilterFunction function); -/** Translates a given interpolation policy to a string. - * - * @param[in] policy @ref InterpolationPolicy to be translated to string. - * - * @return The string describing the interpolation policy. - */ -const std::string &string_from_interpolation_policy(InterpolationPolicy policy); + /** Translates a given border mode policy to a string. * * @param[in] border_mode @ref BorderMode to be translated to string. @@ -1079,162 +222,67 @@ const std::string &string_from_norm_type(NormType type); * @return The string describing the pooling type. */ const std::string &string_from_pooling_type(PoolingType type); -/** Translates a given GEMMLowp output stage to a string. - * - * @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string. - * - * @return The string describing the GEMMLowp output stage - */ -const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType output_stage); -/** Convert a PixelValue to a string, represented through the specific data type - * - * @param[in] value The PixelValue to convert - * @param[in] data_type The type to be used to convert the @p value +/** Check if the pool region is entirely outside the input tensor * - * @return String representation of the PixelValue through the given data type. - */ -std::string string_from_pixel_value(const PixelValue &value, const DataType data_type); -/** Lower a given string. + * @param[in] info @ref PoolingLayerInfo to be checked. * - * @param[in] val Given string to lower. - * - * @return The lowered string + * @return True if the pool region is entirely outside the input tensor, False otherwise. */ -std::string lower_string(const std::string &val); - -/** Check if a given data type is of floating point type +bool is_pool_region_entirely_outside_input(const PoolingLayerInfo &info); +/** Check if the 3d pool region is entirely outside the input tensor * - * @param[in] dt Input data type. + * @param[in] info @ref Pooling3dLayerInfo to be checked. * - * @return True if data type is of floating point type, else false. + * @return True if the pool region is entirely outside the input tensor, False otherwise. */ -inline bool is_data_type_float(DataType dt) -{ - switch(dt) - { - case DataType::F16: - case DataType::F32: - return true; - default: - return false; - } -} - -/** Check if a given data type is of quantized type - * - * @note Quantized is considered a super-set of fixed-point and asymmetric data types. +bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info); +/** Check if the 3D padding is symmetric i.e. padding in each opposite sides are euqal (left=right, top=bottom and front=back) * - * @param[in] dt Input data type. + * @param[in] info @ref Padding3D input 3D padding object to check if it is symmetric * - * @return True if data type is of quantized type, else false. + * @return True if padding is symmetric */ -inline bool is_data_type_quantized(DataType dt) +inline bool is_symmetric(const Padding3D &info) { - switch(dt) - { - case DataType::QSYMM8: - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - case DataType::QASYMM16: - return true; - default: - return false; - } + return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back)); } - -/** Check if a given data type is of asymmetric quantized type +/** Translates a given GEMMLowp output stage to a string. * - * @param[in] dt Input data type. + * @param[in] output_stage @ref GEMMLowpOutputStageInfo to be translated to string. * - * @return True if data type is of asymmetric quantized type, else false. + * @return The string describing the GEMMLowp output stage */ -inline bool is_data_type_quantized_asymmetric(DataType dt) -{ - switch(dt) - { - case DataType::QASYMM8: - case DataType::QASYMM8_SIGNED: - case DataType::QASYMM16: - return true; - default: - return false; - } -} - -/** Check if a given data type is of asymmetric quantized signed type +const std::string &string_from_gemmlowp_output_stage(GEMMLowpOutputStageType output_stage); +/** Convert a PixelValue to a string, represented through the specific data type * - * @param[in] dt Input data type. + * @param[in] value The PixelValue to convert + * @param[in] data_type The type to be used to convert the @p value * - * @return True if data type is of asymmetric quantized signed type, else false. + * @return String representation of the PixelValue through the given data type. */ -inline bool is_data_type_quantized_asymmetric_signed(DataType dt) -{ - switch(dt) - { - case DataType::QASYMM8_SIGNED: - return true; - default: - return false; - } -} +std::string string_from_pixel_value(const PixelValue &value, const DataType data_type); -/** Check if a given data type is of symmetric quantized type +/** Stores padding information before configuring a kernel * - * @param[in] dt Input data type. + * @param[in] infos list of tensor infos to store the padding info for * - * @return True if data type is of symmetric quantized type, else false. + * @return An unordered map where each tensor info pointer is paired with its original padding info */ -inline bool is_data_type_quantized_symmetric(DataType dt) -{ - switch(dt) - { - case DataType::QSYMM8: - case DataType::QSYMM8_PER_CHANNEL: - case DataType::QSYMM16: - return true; - default: - return false; - } -} - -/** Check if a given data type is of per channel type +std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensorInfo *> infos); +/** Stores padding information before configuring a kernel * - * @param[in] dt Input data type. + * @param[in] tensors list of tensors to store the padding info for * - * @return True if data type is of per channel type, else false. + * @return An unordered map where each tensor info pointer is paired with its original padding info */ -inline bool is_data_type_quantized_per_channel(DataType dt) -{ - switch(dt) - { - case DataType::QSYMM8_PER_CHANNEL: - return true; - default: - return false; - } -} - -/** Create a string with the float in full precision. +std::unordered_map<const ITensorInfo *, PaddingSize> get_padding_info(std::initializer_list<const ITensor *> tensors); +/** Check if the previously stored padding info has changed after configuring a kernel * - * @param val Floating point value + * @param[in] padding_map an unordered map where each tensor info pointer is paired with its original padding info * - * @return String with the floating point value. + * @return true if any of the tensor infos has changed its paddings */ -inline std::string float_to_string_with_full_precision(float val) -{ - std::stringstream ss; - ss.precision(std::numeric_limits<float>::max_digits10); - ss << val; - - if(val != static_cast<int>(val)) - { - ss << "f"; - } - - return ss.str(); -} +bool has_padding_changed(const std::unordered_map<const ITensorInfo *, PaddingSize> &padding_map); /** Returns the number of elements required to go from start to end with the wanted step * @@ -1250,67 +298,6 @@ inline size_t num_of_elements_in_range(const float start, const float end, const return size_t(std::ceil((end - start) / step)); } -/** Returns true if the value can be represented by the given data type - * - * @param[in] val value to be checked - * @param[in] dt data type that is checked - * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8 - * - * @return true if the data type can hold the value. - */ -template <typename T> -bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo()) -{ - switch(dt) - { - case DataType::U8: - { - const auto val_u8 = static_cast<uint8_t>(val); - return ((val_u8 == val) && val_u8 >= std::numeric_limits<uint8_t>::lowest() && val_u8 <= std::numeric_limits<uint8_t>::max()); - } - case DataType::QASYMM8: - { - double min = static_cast<double>(dequantize_qasymm8(0, qinfo)); - double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo)); - return ((double)val >= min && (double)val <= max); - } - case DataType::S8: - { - const auto val_s8 = static_cast<int8_t>(val); - return ((val_s8 == val) && val_s8 >= std::numeric_limits<int8_t>::lowest() && val_s8 <= std::numeric_limits<int8_t>::max()); - } - case DataType::U16: - { - const auto val_u16 = static_cast<uint16_t>(val); - return ((val_u16 == val) && val_u16 >= std::numeric_limits<uint16_t>::lowest() && val_u16 <= std::numeric_limits<uint16_t>::max()); - } - case DataType::S16: - { - const auto val_s16 = static_cast<int16_t>(val); - return ((val_s16 == val) && val_s16 >= std::numeric_limits<int16_t>::lowest() && val_s16 <= std::numeric_limits<int16_t>::max()); - } - case DataType::U32: - { - const auto val_u32 = static_cast<uint32_t>(val); - return ((val_u32 == val) && val_u32 >= std::numeric_limits<uint32_t>::lowest() && val_u32 <= std::numeric_limits<uint32_t>::max()); - } - case DataType::S32: - { - const auto val_s32 = static_cast<int32_t>(val); - return ((val_s32 == val) && val_s32 >= std::numeric_limits<int32_t>::lowest() && val_s32 <= std::numeric_limits<int32_t>::max()); - } - case DataType::BFLOAT16: - return (val >= bfloat16::lowest() && val <= bfloat16::max()); - case DataType::F16: - return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max()); - case DataType::F32: - return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max()); - default: - ARM_COMPUTE_ERROR("Data type not supported"); - return false; - } -} - #ifdef ARM_COMPUTE_ASSERTS_ENABLED /** Print consecutive elements to an output stream. * @@ -1321,26 +308,27 @@ bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = Quantization * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter */ template <typename T> -void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") +void print_consecutive_elements_impl( + std::ostream &s, const T *ptr, unsigned int n, int stream_width = 0, const std::string &element_delim = " ") { using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type; std::ios stream_status(nullptr); stream_status.copyfmt(s); - for(unsigned int i = 0; i < n; ++i) + for (unsigned int i = 0; i < n; ++i) { // Set stream width as it is not a "sticky" stream manipulator - if(stream_width != 0) + if (stream_width != 0) { s.width(stream_width); } - if(std::is_same<typename std::decay<T>::type, half>::value) + if (std::is_same<typename std::decay<T>::type, half>::value) { // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int. s << std::right << static_cast<T>(ptr[i]) << element_delim; } - else if(std::is_same<typename std::decay<T>::type, bfloat16>::value) + else if (std::is_same<typename std::decay<T>::type, bfloat16>::value) { // We use T instead of print_type here is because the std::is_floating_point<bfloat16> returns false and then the print_type becomes int. s << std::right << float(ptr[i]) << element_delim; @@ -1369,17 +357,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u using print_type = typename std::conditional<std::is_floating_point<T>::value, T, int>::type; int max_width = -1; - for(unsigned int i = 0; i < n; ++i) + for (unsigned int i = 0; i < n; ++i) { std::stringstream ss; ss.copyfmt(s); - if(std::is_same<typename std::decay<T>::type, half>::value) + if (std::is_same<typename std::decay<T>::type, half>::value) { // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int. ss << static_cast<T>(ptr[i]); } - else if(std::is_same<typename std::decay<T>::type, bfloat16>::value) + else if (std::is_same<typename std::decay<T>::type, bfloat16>::value) { // We use T instead of print_type here is because the std::is_floating_point<bfloat> returns false and then the print_type becomes int. ss << float(ptr[i]); @@ -1403,7 +391,12 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u * @param[in] stream_width (Optional) Width of the stream. If set to 0 the element's width is used. Defaults to 0. * @param[in] element_delim (Optional) Delimeter among the consecutive elements. Defaults to space delimeter */ -void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim = " "); +void print_consecutive_elements(std::ostream &s, + DataType dt, + const uint8_t *ptr, + unsigned int n, + int stream_width, + const std::string &element_delim = " "); /** Identify the maximum width of n consecutive elements. * @@ -1416,5 +409,5 @@ void print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr */ int max_consecutive_elements_display_width(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n); #endif /* ARM_COMPUTE_ASSERTS_ENABLED */ -} +} // namespace arm_compute #endif /*ARM_COMPUTE_UTILS_H */ diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h index bbea5e5575..5550560aff 100644 --- a/arm_compute/core/Validate.h +++ b/arm_compute/core/Validate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,12 +25,12 @@ #define ARM_COMPUTE_VALIDATE_H #include "arm_compute/core/Error.h" -#include "arm_compute/core/HOGInfo.h" #include "arm_compute/core/IKernel.h" -#include "arm_compute/core/IMultiHOG.h" -#include "arm_compute/core/IMultiImage.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/MultiImageInfo.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/utils/DataLayoutUtils.h" +#include "arm_compute/core/utils/DataTypeUtils.h" +#include "arm_compute/core/utils/FormatUtils.h" #include "arm_compute/core/Window.h" #include <algorithm> @@ -50,9 +50,9 @@ namespace detail template <typename T> inline bool have_different_dimensions(const Dimensions<T> &dim1, const Dimensions<T> &dim2, unsigned int upper_dim) { - for(unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i) + for (unsigned int i = upper_dim; i < arm_compute::Dimensions<T>::num_max_dimensions; ++i) { - if(dim1[i] != dim2[i]) + if (dim1[i] != dim2[i]) { return true; } @@ -80,7 +80,7 @@ public: * @param[in] line Source code line. Used for error reporting. */ compare_dimension(const Dimensions<T> &dim, const char *function, const char *file, int line) - : _dim{ dim }, _function{ function }, _file{ file }, _line{ line } + : _dim{dim}, _function{function}, _file{file}, _line{line} { } @@ -111,7 +111,7 @@ inline arm_compute::Status for_each_error(F &&) } template <typename F, typename T, typename... Ts> -inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&... args) +inline arm_compute::Status for_each_error(F &&func, T &&arg, Ts &&...args) { ARM_COMPUTE_RETURN_ON_ERROR(func(arg)); ARM_COMPUTE_RETURN_ON_ERROR(for_each_error(func, args...)); @@ -148,13 +148,11 @@ struct get_tensor_info_t<ITensorInfo *> * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&... pointers) +inline arm_compute::Status error_on_nullptr(const char *function, const char *file, const int line, Ts &&...pointers) { - const std::array<const void *, sizeof...(Ts)> pointers_array{ { std::forward<Ts>(pointers)... } }; - bool has_nullptr = std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) - { - return (ptr == nullptr); - }); + const std::array<const void *, sizeof...(Ts)> pointers_array{{std::forward<Ts>(pointers)...}}; + bool has_nullptr = + std::any_of(pointers_array.begin(), pointers_array.end(), [&](const void *ptr) { return (ptr == nullptr); }); ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(has_nullptr, function, file, line, "Nullptr object!"); return arm_compute::Status{}; } @@ -178,8 +176,8 @@ inline arm_compute::Status error_on_nullptr(const char *function, const char *fi * * @return Status */ -arm_compute::Status error_on_mismatching_windows(const char *function, const char *file, const int line, - const Window &full, const Window &win); +arm_compute::Status error_on_mismatching_windows( + const char *function, const char *file, const int line, const Window &full, const Window &win); #define ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(f, w) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_windows(__func__, __FILE__, __LINE__, f, w)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_WINDOWS(f, w) \ @@ -200,8 +198,8 @@ arm_compute::Status error_on_mismatching_windows(const char *function, const cha * * @return Status */ -arm_compute::Status error_on_invalid_subwindow(const char *function, const char *file, const int line, - const Window &full, const Window &sub); +arm_compute::Status error_on_invalid_subwindow( + const char *function, const char *file, const int line, const Window &full, const Window &sub); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subwindow(__func__, __FILE__, __LINE__, f, s)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBWINDOW(f, s) \ @@ -220,12 +218,14 @@ arm_compute::Status error_on_invalid_subwindow(const char *function, const char * * @return Status */ -arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *function, const char *file, const int line, - const Window &full, const Window &window, const int dim); +arm_compute::Status error_on_window_not_collapsable_at_dimension( + const char *function, const char *file, const int line, const Window &full, const Window &window, const int dim); #define ARM_COMPUTE_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_NOT_COLLAPSABLE_AT_DIMENSION(f, w, d) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_window_not_collapsable_at_dimension(__func__, __FILE__, __LINE__, f, w, d)) /** Return an error if the passed coordinates have too many dimensions. * @@ -239,8 +239,8 @@ arm_compute::Status error_on_window_not_collapsable_at_dimension(const char *fun * * @return Status */ -arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, const char *file, const int line, - const Coordinates &pos, unsigned int max_dim); +arm_compute::Status error_on_coordinates_dimensions_gte( + const char *function, const char *file, const int line, const Coordinates &pos, unsigned int max_dim); #define ARM_COMPUTE_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_coordinates_dimensions_gte(__func__, __FILE__, __LINE__, p, md)) #define ARM_COMPUTE_RETURN_ERROR_ON_COORDINATES_DIMENSIONS_GTE(p, md) \ @@ -258,8 +258,8 @@ arm_compute::Status error_on_coordinates_dimensions_gte(const char *function, co * * @return Status */ -arm_compute::Status error_on_window_dimensions_gte(const char *function, const char *file, const int line, - const Window &win, unsigned int max_dim); +arm_compute::Status error_on_window_dimensions_gte( + const char *function, const char *file, const int line, const Window &win, unsigned int max_dim); #define ARM_COMPUTE_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_window_dimensions_gte(__func__, __FILE__, __LINE__, w, md)) #define ARM_COMPUTE_RETURN_ERROR_ON_WINDOW_DIMENSIONS_GTE(w, md) \ @@ -277,16 +277,82 @@ arm_compute::Status error_on_window_dimensions_gte(const char *function, const c * @return Status */ template <typename T, typename... Ts> -arm_compute::Status error_on_mismatching_dimensions(const char *function, const char *file, int line, - const Dimensions<T> &dim1, const Dimensions<T> &dim2, Ts &&... dims) +arm_compute::Status error_on_mismatching_dimensions(const char *function, + const char *file, + int line, + const Dimensions<T> &dim1, + const Dimensions<T> &dim2, + Ts &&...dims) { - ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, std::forward<Ts>(dims)...)); + ARM_COMPUTE_RETURN_ON_ERROR(detail::for_each_error(detail::compare_dimension<T>(dim1, function, file, line), dim2, + std::forward<Ts>(dims)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_dimensions(__func__, __FILE__, __LINE__, __VA_ARGS__)) + +/** Return true if the given format has horizontal subsampling. + * + * @param[in] format Format to determine subsampling. + * + * @return True if the format can be subsampled horizontaly. + */ +inline bool has_format_horizontal_subsampling(Format format) +{ + return (format == Format::YUYV422 || format == Format::UYVY422 || format == Format::NV12 || + format == Format::NV21 || format == Format::IYUV || format == Format::UV88) + ? true + : false; +} + +/** Return true if the given format has vertical subsampling. + * + * @param[in] format Format to determine subsampling. + * + * @return True if the format can be subsampled verticaly. + */ +inline bool has_format_vertical_subsampling(Format format) +{ + return (format == Format::NV12 || format == Format::NV21 || format == Format::IYUV || format == Format::UV88) + ? true + : false; +} + +/** Adjust tensor shape size if width or height are odd for a given multi-planar format. No modification is done for other formats. + * + * @note Adding here a few links discussing the issue of odd size and sharing the same solution: + * <a href="https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/graphics/java/android/graphics/YuvImage.java">Android Source</a> + * <a href="https://groups.google.com/a/webmproject.org/forum/#!topic/webm-discuss/LaCKpqiDTXM">WebM</a> + * <a href="https://bugs.chromium.org/p/libyuv/issues/detail?id=198&can=1&q=odd%20width">libYUV</a> + * <a href="https://sourceforge.net/p/raw-yuvplayer/bugs/1/">YUVPlayer</a> * + * + * @param[in, out] shape Tensor shape of 2D size + * @param[in] format Format of the tensor + * + * @return The adjusted tensor shape. + */ +inline TensorShape adjust_odd_shape(const TensorShape &shape, Format format) +{ + TensorShape output{shape}; + + // Force width to be even for formats which require subsampling of the U and V channels + if (has_format_horizontal_subsampling(format)) + { + output.set(0, (output.x() + 1) & ~1U); + } + + // Force height to be even for formats which require subsampling of the U and V channels + if (has_format_vertical_subsampling(format)) + { + output.set(1, (output.y() + 1) & ~1U); + } + + return output; +} /** Return an error if the passed tensor objects are not even. * @@ -300,18 +366,20 @@ arm_compute::Status error_on_mismatching_dimensions(const char *function, const * @return Status */ template <typename... Ts> -arm_compute::Status error_on_tensors_not_even(const char *function, const char *file, int line, - const Format &format, const ITensor *tensor1, Ts... tensors) +arm_compute::Status error_on_tensors_not_even( + const char *function, const char *file, int line, const Format &format, const ITensor *tensor1, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor) - { - const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format); - return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2); - }), - function, file, line, "Tensor shape has odd dimensions"); + const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), + [&](const ITensor *tensor) + { + const TensorShape correct_shape = adjust_odd_shape(tensor->info()->tensor_shape(), format); + return detail::have_different_dimensions(tensor->info()->tensor_shape(), correct_shape, 2); + }), + function, file, line, "Tensor shape has odd dimensions"); return arm_compute::Status{}; } @@ -320,6 +388,38 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char * #define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_EVEN(...) \ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_even(__func__, __FILE__, __LINE__, __VA_ARGS__)) +/** Calculate subsampled shape for a given format and channel + * + * @param[in] shape Shape of the tensor to calculate the extracted channel. + * @param[in] format Format of the tensor. + * @param[in] channel Channel to create tensor shape to be extracted. + * + * @return The subsampled tensor shape. + */ +inline TensorShape +calculate_subsampled_shape(const TensorShape &shape, Format format, Channel channel = Channel::UNKNOWN) +{ + TensorShape output{shape}; + + // Subsample shape only for U or V channel + if (Channel::U == channel || Channel::V == channel || Channel::UNKNOWN == channel) + { + // Subsample width for the tensor shape when channel is U or V + if (has_format_horizontal_subsampling(format)) + { + output.set(0, output.x() / 2U); + } + + // Subsample height for the tensor shape when channel is U or V + if (has_format_vertical_subsampling(format)) + { + output.set(1, output.y() / 2U); + } + } + + return output; +} + /** Return an error if the passed tensor objects are not sub-sampled. * * @param[in] function Function in which the error occurred. @@ -333,25 +433,32 @@ arm_compute::Status error_on_tensors_not_even(const char *function, const char * * @return Status */ template <typename... Ts> -arm_compute::Status error_on_tensors_not_subsampled(const char *function, const char *file, int line, - const Format &format, const TensorShape &shape, const ITensor *tensor1, Ts... tensors) +arm_compute::Status error_on_tensors_not_subsampled(const char *function, + const char *file, + int line, + const Format &format, + const TensorShape &shape, + const ITensor *tensor1, + Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - const TensorShape sub2_shape = calculate_subsampled_shape(shape, format); - const std::array < const ITensor *, 1 + sizeof...(Ts) > tensors_info_array{ { tensor1, std::forward<Ts>(tensors)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), [&](const ITensor * tensor) - { - return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); - }), - function, file, line, "Tensor shape has mismatch dimensions for sub-sampling"); + const TensorShape sub2_shape = calculate_subsampled_shape(shape, format); + const std::array<const ITensor *, 1 + sizeof...(Ts)> tensors_info_array{{tensor1, std::forward<Ts>(tensors)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensors_info_array.cbegin(), tensors_info_array.cend(), + [&](const ITensor *tensor) + { return detail::have_different_dimensions(tensor->info()->tensor_shape(), sub2_shape, 2); }), + function, file, line, "Tensor shape has mismatch dimensions for sub-sampling"); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_TENSORS_NOT_SUBSAMPLED(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_tensors_not_subsampled(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed two tensor infos have different shapes from the given dimension * @@ -365,10 +472,15 @@ arm_compute::Status error_on_tensors_not_subsampled(const char *function, const * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { - return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)...); + return error_on_mismatching_shapes(function, file, line, 0U, tensor_info_1, tensor_info_2, + std::forward<Ts>(tensor_infos)...); } /** Return an error if the passed two tensors have different shapes from the given dimension * @@ -382,8 +494,12 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { return error_on_mismatching_shapes(function, file, line, 0U, tensor_1, tensor_2, std::forward<Ts>(tensors)...); } @@ -400,19 +516,28 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - unsigned int upper_dim, const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + unsigned int upper_dim, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info_2 == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - const std::array < const ITensorInfo *, 2 + sizeof...(Ts) > tensors_info_array{ { tensor_info_1, tensor_info_2, std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), [&](const ITensorInfo * tensor_info) - { - return detail::have_different_dimensions((*tensors_info_array.cbegin())->tensor_shape(), tensor_info->tensor_shape(), upper_dim); - }), - function, file, line, "Tensors have different shapes"); + const std::array<const ITensorInfo *, 2 + sizeof...(Ts)> tensors_info_array{ + {tensor_info_1, tensor_info_2, tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(std::next(tensors_info_array.cbegin()), tensors_info_array.cend(), + [&](const ITensorInfo *tensor_info) + { + return detail::have_different_dimensions( + (*tensors_info_array.cbegin())->tensor_shape(), + tensor_info->tensor_shape(), upper_dim); + }), + function, file, line, "Tensors have different shapes"); return arm_compute::Status{}; } /** Return an error if the passed two tensors have different shapes from the given dimension @@ -428,14 +553,20 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_shapes(const char *function, const char *file, const int line, - unsigned int upper_dim, const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_shapes(const char *function, + const char *file, + const int line, + unsigned int upper_dim, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_1 == nullptr, function, file, line); ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_2 == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_mismatching_shapes(function, file, line, upper_dim, tensor_1->info(), tensor_2->info(), + detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(...) \ @@ -454,19 +585,18 @@ inline arm_compute::Status error_on_mismatching_shapes(const char *function, con * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_data_layouts( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - DataLayout &&tensor_data_layout = tensor_info->data_layout(); - const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj) - { - return tensor_info_obj->data_layout() != tensor_data_layout; - }), - function, file, line, "Tensors have different data layouts"); + DataLayout &&tensor_data_layout = tensor_info->data_layout(); + const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), + [&](const ITensorInfo *tensor_info_obj) + { return tensor_info_obj->data_layout() != tensor_data_layout; }), + function, file, line, "Tensors have different data layouts"); return arm_compute::Status{}; } /** Return an error if the passed tensors have different data layouts @@ -480,19 +610,21 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_layouts(const char *function, const char *file, const int line, - const ITensor *tensor, Ts... tensors) +inline arm_compute::Status error_on_mismatching_data_layouts( + const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(function, file, line, tensor->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts( + function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_data_layouts(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed two tensor infos have different data types * @@ -505,19 +637,18 @@ inline arm_compute::Status error_on_mismatching_data_layouts(const char *functio * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_data_types( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, Ts... tensor_infos) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensor_infos)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensor_infos...)); - DataType &&tensor_data_type = tensor_info->data_type(); - const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{ { std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), [&](const ITensorInfo * tensor_info_obj) - { - return tensor_info_obj->data_type() != tensor_data_type; - }), - function, file, line, "Tensors have different data types"); + DataType &&tensor_data_type = tensor_info->data_type(); + const std::array<const ITensorInfo *, sizeof...(Ts)> tensors_infos_array{{tensor_infos...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensors_infos_array.begin(), tensors_infos_array.end(), + [&](const ITensorInfo *tensor_info_obj) + { return tensor_info_obj->data_type() != tensor_data_type; }), + function, file, line, "Tensors have different data types"); return arm_compute::Status{}; } /** Return an error if the passed two tensors have different data types @@ -531,19 +662,21 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function, * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_data_types(const char *function, const char *file, const int line, - const ITensor *tensor, Ts... tensors) +inline arm_compute::Status error_on_mismatching_data_types( + const char *function, const char *file, const int line, const ITensor *tensor, Ts... tensors) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, std::forward<Ts>(tensors)...)); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(function, file, line, tensor->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_nullptr(function, file, line, tensors...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types( + function, file, line, tensor->info(), detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_data_types(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Return an error if the passed tensor infos have different asymmetric quantized data types or different quantization info * @@ -559,28 +692,32 @@ inline arm_compute::Status error_on_mismatching_data_types(const char *function, * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) +inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info_1, + const ITensorInfo *tensor_info_2, + Ts... tensor_infos) { DataType &&first_data_type = tensor_info_1->data_type(); const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info(); - if(!is_data_type_quantized(first_data_type)) + if (!is_data_type_quantized(first_data_type)) { return arm_compute::Status{}; } - const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->data_type() != first_data_type; - }), - function, file, line, "Tensors have different asymmetric quantized data types"); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->quantization_info() != first_quantization_info; - }), - function, file, line, "Tensors have different quantization information"); + const std::array<const ITensorInfo *, 1 + sizeof...(Ts)> tensor_infos_array{ + {tensor_info_2, std::forward<Ts>(tensor_infos)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG(std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), + [&](const ITensorInfo *tensor_info) + { return tensor_info->data_type() != first_data_type; }), + function, file, line, "Tensors have different asymmetric quantized data types"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), + [&](const ITensorInfo *tensor_info) + { return tensor_info->quantization_info() != first_quantization_info; }), + function, file, line, "Tensors have different quantization information"); return arm_compute::Status{}; } @@ -598,17 +735,24 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu * @return Status */ template <typename... Ts> -inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, const char *file, const int line, - const ITensor *tensor_1, const ITensor *tensor_2, Ts... tensors) +inline arm_compute::Status error_on_mismatching_quantization_info(const char *function, + const char *file, + const int line, + const ITensor *tensor_1, + const ITensor *tensor_2, + Ts... tensors) { - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(), - detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_mismatching_quantization_info(function, file, line, tensor_1->info(), tensor_2->info(), + detail::get_tensor_info_t<ITensorInfo *>()(tensors)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_mismatching_quantization_info(__func__, __FILE__, __LINE__, __VA_ARGS__)) /** Throw an error if the format of the passed tensor/multi-image does not match any of the formats provided. * @@ -620,8 +764,8 @@ inline arm_compute::Status error_on_mismatching_quantization_info(const char *fu * @param[in] formats (Optional) Further allowed formats. */ template <typename T, typename F, typename... Fs> -void error_on_format_not_in(const char *function, const char *file, const int line, - const T *object, F &&format, Fs &&... formats) +void error_on_format_not_in( + const char *function, const char *file, const int line, const T *object, F &&format, Fs &&...formats) { ARM_COMPUTE_ERROR_ON_LOC(object == nullptr, function, file, line); @@ -630,17 +774,17 @@ void error_on_format_not_in(const char *function, const char *file, const int li ARM_COMPUTE_ERROR_ON_LOC(object_format == Format::UNKNOWN, function, file, line); - const std::array<F, sizeof...(Fs)> formats_array{ { std::forward<Fs>(formats)... } }; + const std::array<F, sizeof...(Fs)> formats_array{{std::forward<Fs>(formats)...}}; ARM_COMPUTE_UNUSED(formats_array); - ARM_COMPUTE_ERROR_ON_LOC_MSG(object_format != format && std::none_of(formats_array.begin(), formats_array.end(), [&](const F & f) - { - return f == object_format; - }), - function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); + ARM_COMPUTE_ERROR_ON_LOC_MSG( + object_format != format && + std::none_of(formats_array.begin(), formats_array.end(), [&](const F &f) { return f == object_format; }), + function, file, line, "Format %s not supported by this kernel", string_from_format(object_format).c_str()); ARM_COMPUTE_UNUSED(function, format, file, line); } -#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) +#define ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(t, ...) \ + ::arm_compute::error_on_format_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__) /** Return an error if the data type of the passed tensor info does not match any of the data types provided. * @@ -654,20 +798,19 @@ void error_on_format_not_in(const char *function, const char *file, const int li * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_not_in( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dt, Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); const DataType &tensor_dt = tensor_info->data_type(); //NOLINT ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dt == DataType::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> dts_array{ { std::forward<Ts>(dts)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T & d) - { - return d == tensor_dt; - }), - function, file, line, "ITensor data type %s not supported by this kernel", string_from_data_type(tensor_dt).c_str()); + const std::array<T, sizeof...(Ts)> dts_array{{std::forward<Ts>(dts)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR( + tensor_dt != dt && std::none_of(dts_array.begin(), dts_array.end(), [&](const T &d) { return d == tensor_dt; }), + function, file, line, "ITensor data type %s not supported by this kernel", + string_from_data_type(tensor_dt).c_str()); return arm_compute::Status{}; } /** Return an error if the data type of the passed tensor does not match any of the data types provided. @@ -682,11 +825,12 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_not_in( + const char *function, const char *file, const int line, const ITensor *tensor, T &&dt, Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in( + function, file, line, tensor->info(), std::forward<T>(dt), std::forward<Ts>(dts)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(t, ...) \ @@ -706,20 +850,19 @@ inline arm_compute::Status error_on_data_type_not_in(const char *function, const * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, T &&dl, Ts &&... dls) +inline arm_compute::Status error_on_data_layout_not_in( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, T &&dl, Ts &&...dls) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); const DataLayout &tensor_dl = tensor_info->data_layout(); //NOLINT ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_dl == DataLayout::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> dls_array{ { std::forward<Ts>(dls)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T & l) - { - return l == tensor_dl; - }), - function, file, line, "ITensor data layout %s not supported by this kernel", string_from_data_layout(tensor_dl).c_str()); + const std::array<T, sizeof...(Ts)> dls_array{{std::forward<Ts>(dls)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR( + tensor_dl != dl && std::none_of(dls_array.begin(), dls_array.end(), [&](const T &l) { return l == tensor_dl; }), + function, file, line, "ITensor data layout %s not supported by this kernel", + string_from_data_layout(tensor_dl).c_str()); return arm_compute::Status{}; } /** Return an error if the data layout of the passed tensor does not match any of the data layout provided. @@ -734,17 +877,19 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_layout_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, T &&dl, Ts &&... dls) +inline arm_compute::Status error_on_data_layout_not_in( + const char *function, const char *file, const int line, const ITensor *tensor, T &&dl, Ts &&...dls) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in( + function, file, line, tensor->info(), std::forward<T>(dl), std::forward<Ts>(dls)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(t, ...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_data_layout_not_in(__func__, __FILE__, __LINE__, t, __VA_ARGS__)) /** Return an error if the data type or the number of channels of the passed tensor info does not match any of the data types and number of channels provided. * @@ -759,12 +904,20 @@ inline arm_compute::Status error_on_data_layout_not_in(const char *function, con * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, size_t num_channels, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, + const char *file, + const int line, + const ITensorInfo *tensor_info, + size_t num_channels, + T &&dt, + Ts &&...dts) { - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in(function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_not_in( + function, file, line, tensor_info, std::forward<T>(dt), std::forward<Ts>(dts)...)); const size_t tensor_nc = tensor_info->num_channels(); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, "Number of channels %zu. Required number of channels %zu", tensor_nc, num_channels); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG_VAR(tensor_nc != num_channels, function, file, line, + "Number of channels %zu. Required number of channels %zu", tensor_nc, + num_channels); return arm_compute::Status{}; } /** Return an error if the data type or the number of channels of the passed tensor does not match any of the data types and number of channels provided. @@ -780,17 +933,25 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, const char *file, const int line, - const ITensor *tensor, size_t num_channels, T &&dt, Ts &&... dts) +inline arm_compute::Status error_on_data_type_channel_not_in(const char *function, + const char *file, + const int line, + const ITensor *tensor, + size_t num_channels, + T &&dt, + Ts &&...dts) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, std::forward<T>(dt), std::forward<Ts>(dts)...)); + ARM_COMPUTE_RETURN_ON_ERROR(error_on_data_type_channel_not_in(function, file, line, tensor->info(), num_channels, + std::forward<T>(dt), std::forward<Ts>(dts)...)); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) #define ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(t, c, ...) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_data_type_channel_not_in(__func__, __FILE__, __LINE__, t, c, __VA_ARGS__)) /** Return an error if the data type of the passed tensor info is FP16 and FP16 extension is not supported by the device. * @@ -802,12 +963,12 @@ inline arm_compute::Status error_on_data_type_channel_not_in(const char *functio * * @return Status */ -inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info, bool is_fp16_supported) +inline arm_compute::Status error_on_unsupported_fp16( + const char *function, const char *file, const int line, const ITensorInfo *tensor_info, bool is_fp16_supported) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), - function, file, line, "FP16 not supported by the device"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16 && !is_fp16_supported), function, + file, line, "FP16 not supported by the device"); return arm_compute::Status{}; } @@ -821,11 +982,12 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const * * @return Status */ -inline arm_compute::Status error_on_unsupported_fp16(const char *function, const char *file, const int line, - const ITensor *tensor, bool is_fp16_supported) +inline arm_compute::Status error_on_unsupported_fp16( + const char *function, const char *file, const int line, const ITensor *tensor, bool is_fp16_supported) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported)); + ARM_COMPUTE_RETURN_ON_ERROR( + ::arm_compute::error_on_unsupported_fp16(function, file, line, tensor->info(), is_fp16_supported)); return arm_compute::Status{}; } @@ -838,8 +1000,8 @@ inline arm_compute::Status error_on_unsupported_fp16(const char *function, const * * @return Status */ -arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, - const ITensor *tensor); +arm_compute::Status +error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensor *tensor); /** Return an error if the tensor info is not 2D. * @@ -850,8 +1012,8 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil * * @return Status */ -arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line, - const ITensorInfo *tensor); +arm_compute::Status +error_on_tensor_not_2d(const char *function, const char *file, const int line, const ITensorInfo *tensor); #define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t)) @@ -870,17 +1032,15 @@ arm_compute::Status error_on_tensor_not_2d(const char *function, const char *fil * @return Status */ template <typename T, typename... Ts> -inline arm_compute::Status error_on_channel_not_in(const char *function, const char *file, const int line, - T cn, T &&channel, Ts &&... channels) +inline arm_compute::Status +error_on_channel_not_in(const char *function, const char *file, const int line, T cn, T &&channel, Ts &&...channels) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(cn == Channel::UNKNOWN, function, file, line); - const std::array<T, sizeof...(Ts)> channels_array{ { std::forward<Ts>(channels)... } }; - ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), [&](const T & f) - { - return f == cn; - }), - function, file, line); + const std::array<T, sizeof...(Ts)> channels_array{{std::forward<Ts>(channels)...}}; + ARM_COMPUTE_RETURN_ERROR_ON_LOC(channel != cn && std::none_of(channels_array.begin(), channels_array.end(), + [&](const T &f) { return f == cn; }), + function, file, line); return arm_compute::Status{}; } #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN(c, ...) \ @@ -898,35 +1058,13 @@ inline arm_compute::Status error_on_channel_not_in(const char *function, const c * * @return Status */ -arm_compute::Status error_on_channel_not_in_known_format(const char *function, const char *file, const int line, - Format fmt, Channel cn); +arm_compute::Status +error_on_channel_not_in_known_format(const char *function, const char *file, const int line, Format fmt, Channel cn); #define ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c)) #define ARM_COMPUTE_RETURN_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(f, c) \ ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_channel_not_in_known_format(__func__, __FILE__, __LINE__, f, c)) -/** Return an error if the @ref IMultiHOG container is invalid - * - * An @ref IMultiHOG container is invalid if: - * - * -# it is a nullptr - * -# it doesn't contain models - * -# it doesn't have the HOG data objects with the same phase_type, normalization_type and l2_hyst_threshold (if normalization_type == L2HYS_NORM) - * - * @param[in] function Function in which the error occurred. - * @param[in] file Name of the file where the error occurred. - * @param[in] line Line on which the error occurred. - * @param[in] multi_hog IMultiHOG container to validate - * - * @return Status - */ -arm_compute::Status error_on_invalid_multi_hog(const char *function, const char *file, const int line, - const IMultiHOG *multi_hog); -#define ARM_COMPUTE_ERROR_ON_INVALID_MULTI_HOG(m) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m)) -#define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_MULTI_HOG(m) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_multi_hog(__func__, __FILE__, __LINE__, m)) - /** Return an error if the kernel is not configured. * * @param[in] function Function in which the error occurred. @@ -936,8 +1074,8 @@ arm_compute::Status error_on_invalid_multi_hog(const char *function, const char * * @return Status */ -arm_compute::Status error_on_unconfigured_kernel(const char *function, const char *file, const int line, - const IKernel *kernel); +arm_compute::Status +error_on_unconfigured_kernel(const char *function, const char *file, const int line, const IKernel *kernel); #define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unconfigured_kernel(__func__, __FILE__, __LINE__, k)) #define ARM_COMPUTE_RETURN_ERROR_ON_UNCONFIGURED_KERNEL(k) \ @@ -954,8 +1092,12 @@ arm_compute::Status error_on_unconfigured_kernel(const char *function, const cha * * @return Status */ -arm_compute::Status error_on_invalid_subtensor(const char *function, const char *file, const int line, - const TensorShape &parent_shape, const Coordinates &coords, const TensorShape &shape); +arm_compute::Status error_on_invalid_subtensor(const char *function, + const char *file, + const int line, + const TensorShape &parent_shape, + const Coordinates &coords, + const TensorShape &shape); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \ ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, p, c, s)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR(p, c, s) \ @@ -971,11 +1113,16 @@ arm_compute::Status error_on_invalid_subtensor(const char *function, const char * * @return Status */ -arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, const char *file, const int line, - const ValidRegion &parent_valid_region, const ValidRegion &valid_region); +arm_compute::Status error_on_invalid_subtensor_valid_region(const char *function, + const char *file, + const int line, + const ValidRegion &parent_valid_region, + const ValidRegion &valid_region); #define ARM_COMPUTE_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \ - ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) + ARM_COMPUTE_ERROR_THROW_ON( \ + ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) #define ARM_COMPUTE_RETURN_ERROR_ON_INVALID_SUBTENSOR_VALID_REGION(pv, sv) \ - ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) -} + ARM_COMPUTE_RETURN_ON_ERROR( \ + ::arm_compute::error_on_invalid_subtensor_valid_region(__func__, __FILE__, __LINE__, pv, sv)) +} // namespace arm_compute #endif /* ARM_COMPUTE_VALIDATE_H*/ diff --git a/arm_compute/core/Version.h b/arm_compute/core/Version.h index be3f0264bb..44d400bad8 100644 --- a/arm_compute/core/Version.h +++ b/arm_compute/core/Version.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,12 +27,12 @@ #include <string> /* Macro utilities */ -#define STRINGIFY2(s) #s -#define STRINGIFY(s) STRINGIFY2(s) +#define ARM_COMPUTE_STRINGIFY2(s) #s +#define ARM_COMPUTE_STRINGIFY(s) ARM_COMPUTE_STRINGIFY2(s) -#define ARM_COMPUTE_VERSION_STR \ - STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \ - "." STRINGIFY(ARM_COMPUTE_VERSION_MINOR) "." STRINGIFY(ARM_COMPUTE_VERSION_PATCH) +#define ARM_COMPUTE_VERSION_STR \ + ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MAJOR) \ + "." ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_MINOR) "." ARM_COMPUTE_STRINGIFY(ARM_COMPUTE_VERSION_PATCH) namespace arm_compute { @@ -45,4 +45,7 @@ namespace arm_compute std::string build_information(); } // namespace arm_compute +#undef ARM_COMPUTE_STRINGIFY +#undef ARM_COMPUTE_STRINGIFY2 + #endif /* ARM_COMPUTE_LIBRARY_VERSION_H */ diff --git a/arm_compute/core/Window.h b/arm_compute/core/Window.h index d6690d484a..e93d2863c9 100644 --- a/arm_compute/core/Window.h +++ b/arm_compute/core/Window.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,17 +21,17 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_WINDOW_H -#define ARM_COMPUTE_WINDOW_H - -#include <algorithm> -#include <array> -#include <cstddef> +#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_H +#define ACL_ARM_COMPUTE_CORE_WINDOW_H #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/math/Math.h" + +#include <algorithm> +#include <array> +#include <cstddef> namespace arm_compute { @@ -45,6 +45,10 @@ public: static constexpr size_t DimY = 1; /** Alias for dimension 2 also known as Z dimension */ static constexpr size_t DimZ = 2; + /** Alias for dimension 3 also known as W dimension */ + static constexpr size_t DimW = 3; + /** Alias for dimension 4 also known as V dimension */ + static constexpr size_t DimV = 4; /** Default constructor: create a window containing a single element. */ constexpr Window() @@ -82,10 +86,10 @@ public: * @param[in] step Step between two elements of the dimension when iterating. * */ - constexpr Dimension(int start = 0, int end = 1, int step = 1) - : _start(start), _end(end), _step(step) + constexpr Dimension(int start = 0, int end = 1, int step = 1) : _start(start), _end(end), _step(step) { } + Dimension(const Dimension &d) = default; /** Default assignment operator to allow dimensions to be copied */ Dimension &operator=(const Dimension &d) = default; /** Return the start of the dimension */ @@ -119,6 +123,17 @@ public: { _end = end; } + /** Check whether two Dimensions are equal. + * + * @param[in] lhs LHS Dimensions + * @param[in] rhs RHS Dimensions + * + * @return True if the Dimensions are the same. + */ + friend bool operator==(const Dimension &lhs, const Dimension &rhs) + { + return (lhs._start == rhs._start) && (lhs._end == rhs._end) && (lhs._step == rhs._step); + } private: int _start; /**< Start of the dimension */ @@ -198,15 +213,17 @@ public: */ void shift(size_t dimension, int shift_value); - /** Shift down all the dimensions of a window + /** Shift down all the dimensions of a window starting from the specified dimension. * - * i.e new_dims[n] = old_dims[n+shift_value]. + * new_dims[i] = old_dims[i] for all i < start_dim. + * new_dims[i] = old_dims[i+shift_value] for all i >= start_dim. * * @param[in] shift_value Number of dimensions to shift the window by. + * @param[in] start_dim The dimension from which the dimensions start to shift. * * @return The window with the shifted dimensions. */ - Window shift_dimensions(unsigned int shift_value) const; + Window shift_dimensions(unsigned int shift_value, unsigned int start_dim = 0) const; /** Adjust the start or end of a given dimension by the given value * @@ -346,7 +363,6 @@ public: { return slide_window_slice<4>(slice); } - /** Collapse the dimensions between @p first and @p last if possible. * * A dimension is collapsable if it starts from 0 and matches the corresponding dimension in the full_window @@ -358,7 +374,8 @@ public: * * @return Collapsed window. */ - Window collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const; + Window + collapse_if_possible(const Window &full_window, size_t first, size_t last, bool *has_collapsed = nullptr) const; /** Collapse the dimensions higher than @p first if possible. * @@ -411,6 +428,14 @@ public: * @param[in] rhs Second window to swap. */ friend void swap(Window &lhs, Window &rhs); + /** Check whether two Windows are equal. + * + * @param[in] lhs LHS window + * @param[in] rhs RHS window + * + * @return True if the given windows are the same. + */ + friend bool operator==(const Window &lhs, const Window &rhs); private: /** First slice of the window @@ -418,7 +443,7 @@ private: * @return The first slice of the window. */ template <unsigned int window_dimension> - Window first_slice_window() const; + Window first_slice_window() const; /** Slide the passed window slice. * @@ -437,4 +462,4 @@ private: }; } // namespace arm_compute #include "Window.inl" -#endif /*ARM_COMPUTE_WINDOW_H */ +#endif // ACL_ARM_COMPUTE_CORE_WINDOW_H diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl index 70c4f80ac2..0f7c4fbdd7 100644 --- a/arm_compute/core/Window.inl +++ b/arm_compute/core/Window.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_INL +#define ACL_ARM_COMPUTE_CORE_WINDOW_INL + namespace arm_compute { inline Window::Window(const Window &src) : _dims(), _is_broadcasted(utility::generate_array<bool, Coordinates::num_max_dimensions, false>::value) { - for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i) { set(i, src[i]); _is_broadcasted[i] = src.is_broadcasted(i); @@ -65,32 +69,34 @@ inline bool Window::is_broadcasted(size_t dimension) const return _is_broadcasted[dimension]; } -inline Window Window::collapse_if_possible(const Window &full_window, const size_t first, - const size_t last, bool *has_collapsed) const +inline Window Window::collapse_if_possible(const Window &full_window, + const size_t first, + const size_t last, + bool *has_collapsed) const { Window collapsed(*this); bool is_collapsable = true; int collapsed_end = _dims[first].end(); - for(size_t d = first + 1; is_collapsable && (d < last); ++d) + for (size_t d = first + 1; is_collapsable && (d < last); ++d) { // The _dims's dimension must match the full _dims dimension to be collapsable: - is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) - && (full_window[d].end() == _dims[d].end()); + is_collapsable = (_dims[d].start() == 0) && (full_window[d].start() == 0) && (_dims[d].step() <= 1) && + (full_window[d].end() == _dims[d].end()); collapsed_end *= _dims[d].end(); } - if(is_collapsable) + if (is_collapsable) { collapsed._dims.at(first).set_end(collapsed_end); - for(size_t d = first + 1; is_collapsable && (d < last); ++d) + for (size_t d = first + 1; is_collapsable && (d < last); ++d) { collapsed.set(d, Dimension()); } } - if(has_collapsed != nullptr) + if (has_collapsed != nullptr) { *has_collapsed = is_collapsable; } @@ -98,13 +104,21 @@ inline Window Window::collapse_if_possible(const Window &full_window, const size return collapsed; } -inline Window Window::shift_dimensions(unsigned int shift_value) const +inline Window Window::shift_dimensions(unsigned int shift_value, unsigned int start_dim) const { Window shifted_window; - for(size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++) + size_t n = 0; + + for (; n < start_dim; ++n) + { + shifted_window.set(n, _dims[n]); + } + + for (; n < (Coordinates::num_max_dimensions - shift_value); n++) { shifted_window.set(n, _dims[n + shift_value]); } + return shifted_window; } @@ -120,9 +134,9 @@ inline Window Window::collapse(const Window &full_window, const size_t first, co inline Window Window::broadcast_if_dimension_le_one(const TensorShape &shape) const { Window broadcastWin(*this); - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { - if(shape[d] <= 1) + if (shape[d] <= 1) { broadcastWin.set_broadcasted(d); } @@ -142,7 +156,7 @@ inline void Window::adjust(size_t dimension, int adjust_value, bool is_at_start) ARM_COMPUTE_ERROR_ON(dimension >= Coordinates::num_max_dimensions); Window::Dimension &d = _dims[dimension]; - if(is_at_start) + if (is_at_start) { d = Window::Dimension(d.start() + adjust_value, d.end(), d.step()); } @@ -172,7 +186,7 @@ inline void Window::set_dimension_step(size_t dimension, int step) inline void Window::validate() const { - for(size_t i = 0; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 0; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_ERROR_ON(_dims[i].end() < _dims[i].start()); ARM_COMPUTE_ERROR_ON((_dims[i].step() != 0) && (((_dims[i].end() - _dims[i].start()) % _dims[i].step()) != 0)); @@ -193,21 +207,21 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co Window out; - for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d) { - if(d == dimension) + if (d == dimension) { - int start = _dims[d].start(); - int end = _dims[d].end(); - const int step = _dims[d].step(); + int start = _dims[d].start(); + int end = _dims[d].end(); + const int step = _dims[d].step(); const int num_it = num_iterations(d); const int rem = num_it % total; - int work = num_it / total; + int work = num_it / total; - int it_start = work * id; + int it_start = work * id; - if(int(id) < rem) + if (int(id) < rem) { ++work; it_start += id; @@ -234,18 +248,18 @@ inline Window Window::split_window(size_t dimension, size_t id, size_t total) co template <unsigned int window_dimension> inline bool Window::slide_window_slice(Window &slice) const { - for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) { // Did we reach the end of this dimension? const int v = slice._dims[n].start() + 1; - if(v < _dims[n].end()) + if (v < _dims[n].end()) { // No: increment slice._dims[n] = Dimension(v, v + 1, 1); // Reset lower dimensions: - for(unsigned int lower = window_dimension; lower < n; ++lower) + for (unsigned int lower = window_dimension; lower < n; ++lower) { slice._dims[lower] = Dimension(_dims[lower].start(), _dims[lower].start() + 1, 1); } @@ -258,14 +272,14 @@ inline bool Window::slide_window_slice(Window &slice) const } template <unsigned int window_dimension> -inline Window Window::first_slice_window() const +inline Window Window::first_slice_window() const { Window slice; std::copy_n(_dims.begin(), window_dimension, slice._dims.begin()); //Initialise higher dimensions to be the first slice. - for(unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) + for (unsigned int n = window_dimension; n < Coordinates::num_max_dimensions; ++n) { slice._dims[n] = Dimension(_dims[n].start(), _dims[n].start() + 1, 1); } @@ -275,16 +289,16 @@ inline Window Window::first_slice_window() const inline void Window::use_tensor_dimensions(const TensorShape &shape, size_t first_dimension) { - for(unsigned int n = first_dimension; n < shape.num_dimensions(); ++n) + for (unsigned int n = first_dimension; n < shape.num_dimensions(); ++n) { - set(n, Window::Dimension(0, std::max(shape[n], static_cast<uint32_t>(1)))); + set(n, Window::Dimension(0, std::max(shape[n], static_cast<size_t>(1)))); } } inline TensorShape Window::shape() const { TensorShape shape; - for(size_t d = 0; d < TensorShape::num_max_dimensions; ++d) + for (size_t d = 0; d < TensorShape::num_max_dimensions; ++d) { shape.set(d, (_dims[d].end() - _dims[d].start()) / _dims[d].step()); } @@ -294,7 +308,7 @@ inline TensorShape Window::shape() const inline size_t Window::num_iterations_total() const { size_t total = 1; - for(size_t d = 0; d < Coordinates::num_max_dimensions; ++d) + for (size_t d = 0; d < Coordinates::num_max_dimensions; ++d) { total *= num_iterations(d); } @@ -305,4 +319,11 @@ inline void swap(Window &lhs, Window &rhs) { lhs._dims.swap(rhs._dims); } + +inline bool operator==(const Window &lhs, const Window &rhs) +{ + return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted); +} } // namespace arm_compute + +#endif // ACL_ARM_COMPUTE_CORE_WINDOW_INL diff --git a/arm_compute/core/WindowIterator.h b/arm_compute/core/WindowIterator.h index e7d5334fa0..29302c410a 100644 --- a/arm_compute/core/WindowIterator.h +++ b/arm_compute/core/WindowIterator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,10 +28,6 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Window.h" -//FIXME: Delete the "PRINTF" before the release. In the meantime it's probably going to be useful to debug -//#define PRINTF printf -#define PRINTF(...) - namespace arm_compute { /** Convert an offset in window steps into absolute coordinates. @@ -44,7 +40,7 @@ namespace arm_compute inline Coordinates convert_window_coord_to_position(const Window &w, const Coordinates &offset) { Coordinates position; - for(unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) + for (unsigned int i = 0; i < Coordinates::num_max_dimensions; ++i) { position.set(i, w[i].start() + offset[i] * w[i].step()); } @@ -168,16 +164,14 @@ public: template <typename M> void iterate_3D(M &&on_new_row_size) { - while(_end.z() != _position.z()) + while (_end.z() != _position.z()) { - PRINTF("New slice %d\n", _position.z()); iterate_2D_internal(on_new_row_size, _w.x().end() - _w.x().step(), _w.y().end() - _w.y().step()); _position[2] += _w.z().step(); _position[1] = _w.y().start(); _position[0] = _w.x().start(); } // Left over: - PRINTF("Left over slice\n"); iterate_2D(on_new_row_size); } @@ -217,29 +211,25 @@ private: void iterate_2D_internal(M &&on_new_row_size, int end_x, int end_y) { //Is there more than one row to process ? - if(end_y == _position.y()) + if (end_y == _position.y()) { - // Single row: - PRINTF("Partial row only\n"); // Both start and end belong to the same row: iterate_over_dim0(end_x + _w.x().step(), on_new_row_size); } else { // Do we start from the beginning of the row ? - if(_w.x().start() != _position.x()) + if (_w.x().start() != _position.x()) { //Start in the middle of a row: process left-over X - PRINTF("Partial row first\n"); iterate_over_dim0(_w.x().end(), on_new_row_size); _position[1] += _w.y().step(); } //Middle rows bool no_leftover = end_x + _w.x().step() == _w.x().end(); - if(no_leftover) + if (no_leftover) { - PRINTF("no left over\n"); //Switch to full row size: on_new_row_size(_w[0].start(), _w.x().end()); // Shouldn't be possible to reach that point and not have at least one entire row to process @@ -249,17 +239,14 @@ private: } else { - PRINTF("with left over\n"); // Are there full rows to process ? - if(_position[1] != end_y) + if (_position[1] != end_y) { - PRINTF("full rows\n"); //Switch to full row size: on_new_row_size(_w[0].start(), _w.x().end()); iterate_over_dim1(end_y); } - PRINTF("Final leftover\n"); //Leftover end x _position[0] = _w.x().start(); iterate_over_dim0(end_x + _w.x().step(), on_new_row_size); @@ -273,7 +260,7 @@ private: */ void iterate_over_dim1(int end) { - for(; _position[1] != end; _position[1] += _w[1].step()) + for (; _position[1] != end; _position[1] += _w[1].step()) { _position[0] = _w[0].start(); iterate_over_dim0(_w[0].end()); @@ -298,10 +285,9 @@ private: */ void iterate_over_dim0(int end) { - PRINTF("X [%d, %d, %d]\n", _position.x(), end, _w[0].step()); // Both start and end belong to the same row: ARM_COMPUTE_ERROR_ON(_position[0] > end); - for(; _position.x() < end; _position[0] += _w[0].step()) + for (; _position.x() < end; _position[0] += _w[0].step()) { _lambda_function(_position); } @@ -323,9 +309,10 @@ private: * @return A WindowIterator object. */ template <typename L> -WindowIterator<L> create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function) +WindowIterator<L> +create_window_iterator(const Window &w, const Coordinates &start, const Coordinates &end, L &&lambda_function) { return WindowIterator<L>(w, start, end, std::move(lambda_function)); } -} +} // namespace arm_compute #endif /*ARM_COMPUTE_WINDOW_ITERATOR_H*/ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h new file mode 100644 index 0000000000..63a3a1a1ec --- /dev/null +++ b/arm_compute/core/experimental/Types.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2020-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H +#define ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H + +#include "arm_compute/core/ITensorPack.h" +#include "arm_compute/core/TensorShape.h" + +#include <vector> + +namespace arm_compute +{ +// Forward declaration +class ITensor; + +/** Memory type */ +enum TensorType : int32_t +{ + ACL_UNKNOWN = -1, + ACL_SRC_DST = 0, + + // Src + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_SRC_3 = 3, + ACL_SRC_4 = 4, + ACL_SRC_5 = 5, + ACL_SRC_6 = 6, + ACL_SRC_END = 6, + + // Dst + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_DST_2 = 32, + ACL_DST_END = 32, + + // Aux + ACL_INT = 50, + ACL_INT_0 = 50, + ACL_INT_1 = 51, + ACL_INT_2 = 52, + ACL_INT_3 = 53, + ACL_INT_4 = 54, + ACL_SRC_VEC = 256, + ACL_DST_VEC = 512, + ACL_INT_VEC = 1024, + + // Aliasing Types + // Conv etc + ACL_BIAS = ACL_SRC_2, + + // Gemm + ACL_VEC_ROW_SUM = ACL_SRC_3, + ACL_VEC_COL_SUM = ACL_SRC_4, + ACL_SHIFTS = ACL_SRC_5, + ACL_MULTIPLIERS = ACL_SRC_6, +}; + +namespace experimental +{ +enum class MemoryLifetime +{ + Temporary = 0, + Persistent = 1, + Prepare = 2, +}; +struct MemoryInfo +{ + MemoryInfo() = default; + + MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept : slot(slot), size(size), alignment(alignment) + { + } + + MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept + : slot(slot), lifetime(lifetime), size(size), alignment(alignment) + { + } + + bool merge(int slot, size_t new_size, size_t new_alignment = 0) noexcept + { + if (slot != this->slot) + { + return false; + } + + size = std::max(size, new_size); + alignment = std::max(alignment, new_alignment); + + return true; + } + + int slot{ACL_UNKNOWN}; + MemoryLifetime lifetime{MemoryLifetime::Temporary}; + size_t size{0}; + size_t alignment{64}; +}; + +using MemoryRequirements = std::vector<MemoryInfo>; +} // namespace experimental +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_CORE_EXPERIMENTAL_TYPES_H diff --git a/arm_compute/core/NEON/wrapper/intrinsics/qmov.h b/arm_compute/core/utils/ActivationFunctionUtils.h index bb64bef1e9..c988efa256 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/qmov.h +++ b/arm_compute/core/utils/ActivationFunctionUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,29 +21,21 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_WRAPPER_QMOV_H -#define ARM_COMPUTE_WRAPPER_QMOV_H +#ifndef ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H +#define ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H -#include <arm_neon.h> +#include "arm_compute/core/Types.h" -namespace arm_compute -{ -namespace wrapper -{ -template <typename T> -inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint8x8_t>::type -vqmov(const int16x8_t &a) -{ - return vqmovun_s16(a); -} +#include <string> -template <typename T> -inline typename std::enable_if<std::is_same<T, int8_t>::value, int8x8_t>::type -vqmov(const int16x8_t &a) +namespace arm_compute { - return vqmovn_s16(a); -} - -} // namespace wrapper +/** Translates a given activation function to a string. + * + * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string. + * + * @return The string describing the activation function. + */ +const std::string &string_from_activation_func(const ActivationFunction &act); } // namespace arm_compute -#endif /* ARM_COMPUTE_WRAPPER_QMOV_H */ +#endif /*ARM_COMPUTE_CORE_UTILS_ACTIVATIONFUNCTIONUTILS_H */ diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/utils/DataLayoutUtils.h index c09972353c..61839c9f91 100644 --- a/arm_compute/core/NEON/INEKernel.h +++ b/arm_compute/core/utils/DataLayoutUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_INEKERNEL_H -#define ARM_COMPUTE_INEKERNEL_H +#ifndef ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H +#define ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H +#include "arm_compute/core/Types.h" -#include "arm_compute/core/CPP/ICPPKernel.h" +#include <string> namespace arm_compute { -/** Common interface for all kernels implemented in NEON. */ -using INEKernel = ICPPKernel; +/** Convert a data layout identity into a string. + * + * @param[in] dl @ref DataLayout to be translated to string. + * + * @return The string describing the data layout. + */ +const std::string &string_from_data_layout(DataLayout dl); } // namespace arm_compute -#endif /*ARM_COMPUTE_INEKERNEL_H */ +#endif /*ARM_COMPUTE_CORE_UTILS_DATALAYOUTUTILS_H */ diff --git a/arm_compute/core/utils/DataTypeUtils.h b/arm_compute/core/utils/DataTypeUtils.h new file mode 100644 index 0000000000..6fabb19b64 --- /dev/null +++ b/arm_compute/core/utils/DataTypeUtils.h @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2016-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H +#define ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +/** The size in bytes of the data type + * + * @param[in] data_type Input data type + * + * @return The size in bytes of the data type + */ +inline size_t data_size_from_type(DataType data_type) +{ + switch (data_type) + { + case DataType::U8: + case DataType::S8: + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + return 2; + case DataType::F32: + case DataType::U32: + case DataType::S32: + return 4; + case DataType::F64: + case DataType::U64: + case DataType::S64: + return 8; + case DataType::SIZET: + return sizeof(size_t); + default: + ARM_COMPUTE_ERROR("Invalid data type"); + return 0; + } +} + +/** The size in bytes of the data type + * + * @param[in] dt Input data type + * + * @return The size in bytes of the data type + */ +inline size_t element_size_from_data_type(DataType dt) +{ + switch (dt) + { + case DataType::S8: + case DataType::U8: + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + return 1; + case DataType::U16: + case DataType::S16: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + return 2; + case DataType::U32: + case DataType::S32: + case DataType::F32: + return 4; + case DataType::U64: + case DataType::S64: + return 8; + default: + ARM_COMPUTE_ERROR("Undefined element size for given data type"); + return 0; + } +} + +/** Return the data type used by a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline DataType data_type_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return DataType::U8; + case Format::U16: + return DataType::U16; + case Format::S16: + return DataType::S16; + case Format::U32: + return DataType::U32; + case Format::S32: + return DataType::S32; + case Format::BFLOAT16: + return DataType::BFLOAT16; + case Format::F16: + return DataType::F16; + case Format::F32: + return DataType::F32; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Not supported data_type for given format"); + return DataType::UNKNOWN; + } +} + +/** Return the promoted data type of a given data type. + * + * @note If promoted data type is not supported an error will be thrown + * + * @param[in] dt Data type to get the promoted type of. + * + * @return Promoted data type + */ +inline DataType get_promoted_data_type(DataType dt) +{ + switch (dt) + { + case DataType::U8: + return DataType::U16; + case DataType::S8: + return DataType::S16; + case DataType::U16: + return DataType::U32; + case DataType::S16: + return DataType::S32; + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + case DataType::QASYMM16: + case DataType::BFLOAT16: + case DataType::F16: + case DataType::U32: + case DataType::S32: + case DataType::F32: + ARM_COMPUTE_ERROR("Unsupported data type promotions!"); + default: + ARM_COMPUTE_ERROR("Undefined data type!"); + } + return DataType::UNKNOWN; +} + +/** Compute the mininum and maximum values a data type can take + * + * @param[in] dt Data type to get the min/max bounds of + * + * @return A tuple (min,max) with the minimum and maximum values respectively wrapped in PixelValue. + */ +inline std::tuple<PixelValue, PixelValue> get_min_max(DataType dt) +{ + PixelValue min{}; + PixelValue max{}; + switch (dt) + { + case DataType::U8: + case DataType::QASYMM8: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint8_t>::max())); + break; + } + case DataType::S8: + case DataType::QSYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int8_t>::max())); + break; + } + case DataType::U16: + case DataType::QASYMM16: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<uint16_t>::max())); + break; + } + case DataType::S16: + case DataType::QSYMM16: + { + min = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::lowest())); + max = PixelValue(static_cast<int32_t>(std::numeric_limits<int16_t>::max())); + break; + } + case DataType::U32: + { + min = PixelValue(std::numeric_limits<uint32_t>::lowest()); + max = PixelValue(std::numeric_limits<uint32_t>::max()); + break; + } + case DataType::S32: + { + min = PixelValue(std::numeric_limits<int32_t>::lowest()); + max = PixelValue(std::numeric_limits<int32_t>::max()); + break; + } + case DataType::BFLOAT16: + { + min = PixelValue(bfloat16::lowest()); + max = PixelValue(bfloat16::max()); + break; + } + case DataType::F16: + { + min = PixelValue(std::numeric_limits<half>::lowest()); + max = PixelValue(std::numeric_limits<half>::max()); + break; + } + case DataType::F32: + { + min = PixelValue(std::numeric_limits<float>::lowest()); + max = PixelValue(std::numeric_limits<float>::max()); + break; + } + default: + ARM_COMPUTE_ERROR("Undefined data type!"); + } + return std::make_tuple(min, max); +} + +/** Convert a data type identity into a string. + * + * @param[in] dt @ref DataType to be translated to string. + * + * @return The string describing the data type. + */ +const std::string &string_from_data_type(DataType dt); + +/** Convert a string to DataType + * + * @param[in] name The name of the data type + * + * @return DataType + */ +DataType data_type_from_name(const std::string &name); + +/** Input Stream operator for @ref DataType + * + * @param[in] stream Stream to parse + * @param[out] data_type Output data type + * + * @return Updated stream + */ +inline ::std::istream &operator>>(::std::istream &stream, DataType &data_type) +{ + std::string value; + stream >> value; + data_type = data_type_from_name(value); + return stream; +} + +/** Check if a given data type is of floating point type + * + * @param[in] dt Input data type. + * + * @return True if data type is of floating point type, else false. + */ +inline bool is_data_type_float(DataType dt) +{ + switch (dt) + { + case DataType::F16: + case DataType::F32: + return true; + default: + return false; + } +} + +/** Check if a given data type is of quantized type + * + * @note Quantized is considered a super-set of fixed-point and asymmetric data types. + * + * @param[in] dt Input data type. + * + * @return True if data type is of quantized type, else false. + */ +inline bool is_data_type_quantized(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + case DataType::QASYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of asymmetric quantized type + * + * @param[in] dt Input data type. + * + * @return True if data type is of asymmetric quantized type, else false. + */ +inline bool is_data_type_quantized_asymmetric(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::QASYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of asymmetric quantized signed type + * + * @param[in] dt Input data type. + * + * @return True if data type is of asymmetric quantized signed type, else false. + */ +inline bool is_data_type_quantized_asymmetric_signed(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8_SIGNED: + return true; + default: + return false; + } +} + +/** Check if a given data type is of 8-bit asymmetric quantized signed type + * + * @param[in] dt Input data type. + * + * @return True if data type is of 8-bit asymmetric quantized signed type, else false. + */ +inline bool is_data_type_quantized_asymmetric_char(DataType dt) +{ + switch (dt) + { + case DataType::QASYMM8_SIGNED: + case DataType::QASYMM8: + return true; + default: + return false; + } +} + +/** Check if a given data type is of symmetric quantized type + * + * @param[in] dt Input data type. + * + * @return True if data type is of symmetric quantized type, else false. + */ +inline bool is_data_type_quantized_symmetric(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8: + case DataType::QSYMM8_PER_CHANNEL: + case DataType::QSYMM16: + return true; + default: + return false; + } +} + +/** Check if a given data type is of per channel type + * + * @param[in] dt Input data type. + * + * @return True if data type is of per channel type, else false. + */ +inline bool is_data_type_quantized_per_channel(DataType dt) +{ + switch (dt) + { + case DataType::QSYMM8_PER_CHANNEL: + return true; + default: + return false; + } +} + +/** Returns true if the value can be represented by the given data type + * + * @param[in] val value to be checked + * @param[in] dt data type that is checked + * @param[in] qinfo (Optional) quantization info if the data type is QASYMM8 + * + * @return true if the data type can hold the value. + */ +template <typename T> +bool check_value_range(T val, DataType dt, QuantizationInfo qinfo = QuantizationInfo()) +{ + switch (dt) + { + case DataType::U8: + { + const auto val_u8 = static_cast<uint8_t>(val); + return ((val_u8 == val) && val >= std::numeric_limits<uint8_t>::lowest() && + val <= std::numeric_limits<uint8_t>::max()); + } + case DataType::QASYMM8: + { + double min = static_cast<double>(dequantize_qasymm8(0, qinfo)); + double max = static_cast<double>(dequantize_qasymm8(std::numeric_limits<uint8_t>::max(), qinfo)); + return ((double)val >= min && (double)val <= max); + } + case DataType::S8: + { + const auto val_s8 = static_cast<int8_t>(val); + return ((val_s8 == val) && val >= std::numeric_limits<int8_t>::lowest() && + val <= std::numeric_limits<int8_t>::max()); + } + case DataType::U16: + { + const auto val_u16 = static_cast<uint16_t>(val); + return ((val_u16 == val) && val >= std::numeric_limits<uint16_t>::lowest() && + val <= std::numeric_limits<uint16_t>::max()); + } + case DataType::S16: + { + const auto val_s16 = static_cast<int16_t>(val); + return ((val_s16 == val) && val >= std::numeric_limits<int16_t>::lowest() && + val <= std::numeric_limits<int16_t>::max()); + } + case DataType::U32: + { + const auto val_d64 = static_cast<double>(val); + const auto val_u32 = static_cast<uint32_t>(val); + return ((val_u32 == val_d64) && val_d64 >= std::numeric_limits<uint32_t>::lowest() && + val_d64 <= std::numeric_limits<uint32_t>::max()); + } + case DataType::S32: + { + const auto val_d64 = static_cast<double>(val); + const auto val_s32 = static_cast<int32_t>(val); + return ((val_s32 == val_d64) && val_d64 >= std::numeric_limits<int32_t>::lowest() && + val_d64 <= std::numeric_limits<int32_t>::max()); + } + case DataType::BFLOAT16: + return (val >= bfloat16::lowest() && val <= bfloat16::max()); + case DataType::F16: + return (val >= std::numeric_limits<half>::lowest() && val <= std::numeric_limits<half>::max()); + case DataType::F32: + return (val >= std::numeric_limits<float>::lowest() && val <= std::numeric_limits<float>::max()); + default: + ARM_COMPUTE_ERROR("Data type not supported"); + return false; + } +} + +/** Returns the suffix string of CPU kernel implementation names based on the given data type + * + * @param[in] data_type The data type the CPU kernel implemetation uses + * + * @return the suffix string of CPU kernel implementations + */ +inline std::string cpu_impl_dt(const DataType &data_type) +{ + std::string ret = ""; + + switch (data_type) + { + case DataType::F32: + ret = "fp32"; + break; + case DataType::F16: + ret = "fp16"; + break; + case DataType::U8: + ret = "u8"; + break; + case DataType::S16: + ret = "s16"; + break; + case DataType::S32: + ret = "s32"; + break; + case DataType::QASYMM8: + ret = "qu8"; + break; + case DataType::QASYMM8_SIGNED: + ret = "qs8"; + break; + case DataType::QSYMM16: + ret = "qs16"; + break; + case DataType::QSYMM8_PER_CHANNEL: + ret = "qp8"; + break; + case DataType::BFLOAT16: + ret = "bf16"; + break; + default: + ARM_COMPUTE_ERROR("Unsupported."); + } + + return ret; +} + +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_CORE_UTILS_DATATYPEUTILS_H diff --git a/arm_compute/core/utils/FormatUtils.h b/arm_compute/core/utils/FormatUtils.h new file mode 100644 index 0000000000..a8e96bd361 --- /dev/null +++ b/arm_compute/core/utils/FormatUtils.h @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2016-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H +#define ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H + +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +/** The size in bytes of the pixel format + * + * @param[in] format Input format + * + * @return The size in bytes of the pixel format + */ +inline size_t pixel_size_from_format(Format format) +{ + switch (format) + { + case Format::U8: + return 1; + case Format::U16: + case Format::S16: + case Format::BFLOAT16: + case Format::F16: + case Format::UV88: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + case Format::U32: + case Format::S32: + case Format::F32: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + ARM_COMPUTE_ERROR("Undefined pixel size for given format"); + return 0; + } +} + +/** Return the plane index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The plane index of the specific channel of the specific format + */ +inline int plane_idx_from_channel(Format format, Channel channel) +{ + switch (format) + { + // Single planar formats have a single plane + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + case Format::UV88: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 0; + // Multi planar formats + case Format::NV12: + case Format::NV21: + { + // Channel U and V share the same plane of format UV88 + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::IYUV: + case Format::YUV444: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the channel index of a given channel given an input format. + * + * @param[in] format Input format + * @param[in] channel Input channel + * + * @return The channel index of the specific channel of the specific format + */ +inline int channel_idx_from_format(Format format, Channel channel) +{ + switch (format) + { + case Format::RGB888: + { + switch (channel) + { + case Channel::R: + return 0; + case Channel::G: + return 1; + case Channel::B: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::RGBA8888: + { + switch (channel) + { + case Channel::R: + return 0; + case Channel::G: + return 1; + case Channel::B: + return 2; + case Channel::A: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::YUYV422: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::UYVY422: + { + switch (channel) + { + case Channel::Y: + return 1; + case Channel::U: + return 0; + case Channel::V: + return 2; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::NV12: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 0; + case Channel::V: + return 1; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::NV21: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 1; + case Channel::V: + return 0; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + case Format::YUV444: + case Format::IYUV: + { + switch (channel) + { + case Channel::Y: + return 0; + case Channel::U: + return 0; + case Channel::V: + return 0; + default: + ARM_COMPUTE_ERROR("Not supported channel"); + return 0; + } + } + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of planes for a given format + * + * @param[in] format Input format + * + * @return The number of planes for a given image format. + */ +inline size_t num_planes_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::S16: + case Format::U16: + case Format::S32: + case Format::U32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + case Format::RGB888: + case Format::RGBA8888: + case Format::YUYV422: + case Format::UYVY422: + return 1; + case Format::NV12: + case Format::NV21: + return 2; + case Format::IYUV: + case Format::YUV444: + return 3; + default: + ARM_COMPUTE_ERROR("Not supported format"); + return 0; + } +} + +/** Return the number of channels for a given single-planar pixel format + * + * @param[in] format Input format + * + * @return The number of channels for a given image format. + */ +inline size_t num_channels_from_format(Format format) +{ + switch (format) + { + case Format::U8: + case Format::U16: + case Format::S16: + case Format::U32: + case Format::S32: + case Format::BFLOAT16: + case Format::F16: + case Format::F32: + return 1; + // Because the U and V channels are subsampled + // these formats appear like having only 2 channels: + case Format::YUYV422: + case Format::UYVY422: + return 2; + case Format::UV88: + return 2; + case Format::RGB888: + return 3; + case Format::RGBA8888: + return 4; + //Doesn't make sense for planar formats: + case Format::NV12: + case Format::NV21: + case Format::IYUV: + case Format::YUV444: + default: + return 0; + } +} + +/** Convert a tensor format into a string. + * + * @param[in] format @ref Format to be translated to string. + * + * @return The string describing the format. + */ +const std::string &string_from_format(Format format); +} // namespace arm_compute +#endif /*ARM_COMPUTE_CORE_UTILS_FORMATUTILS_H */ diff --git a/arm_compute/core/NEON/NEFixedPoint.h b/arm_compute/core/utils/InterpolationPolicyUtils.h index 3aff677385..8d4ae4321c 100644 --- a/arm_compute/core/NEON/NEFixedPoint.h +++ b/arm_compute/core/utils/InterpolationPolicyUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,21 +21,21 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEFIXEDPOINT_H -#define ARM_COMPUTE_NEFIXEDPOINT_H +#ifndef ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H +#define ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H -#include <arm_neon.h> +#include "arm_compute/core/Types.h" + +#include <string> namespace arm_compute { -/** Compute lane-by-lane maximum between elements of a float vector with 4x2 elements +/** Translates a given interpolation policy to a string. * - * @param[in] a Float input vector - * @param[in] b Float input vector + * @param[in] policy @ref InterpolationPolicy to be translated to string. * - * @return The lane-by-lane maximum -> float32x4x2 + * @return The string describing the interpolation policy. */ -float32x4x2_t vmax2q_f32(float32x4x2_t a, float32x4x2_t b); +const std::string &string_from_interpolation_policy(InterpolationPolicy policy); } // namespace arm_compute -#include "arm_compute/core/NEON/NEFixedPoint.inl" -#endif /* ARM_COMPUTE_NEFIXEDPOINT_H */
\ No newline at end of file +#endif /*ARM_COMPUTE_CORE_UTILS_INTERPOLATIONPOLICYUTILS_H */ diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/utils/StringUtils.h index b1a9ab32be..c13cbaa334 100644 --- a/arm_compute/core/GLES_COMPUTE/GCHelpers.h +++ b/arm_compute/core/utils/StringUtils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,38 +21,45 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_GCHELPERS_H -#define ARM_COMPUTE_GCHELPERS_H +#ifndef ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H +#define ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H -#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" -#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/Helpers.h" - -#include <set> #include <string> +#include <vector> namespace arm_compute { -// Forward declarations -class GCCoreRuntimeContext; +/** Lower a given string. + * + * @param[in] val Given string to lower. + * + * @return The lowered string + */ +std::string lower_string(const std::string &val); -/** Max vector width of an GLES vector */ -static constexpr unsigned int max_gc_vector_width = 16; +/** Raise a given string to upper case + * + * @param[in] val Given string to lower. + * + * @return The upper case string + */ +std::string upper_string(const std::string &val); -/** Helper function to get the GPU target from GLES using GL_RENDERER enum +/** Create a string with the float in full precision. * - * @return the GPU target + * @param val Floating point value + * + * @return String with the floating point value. */ -GPUTarget get_target_from_device(); -/** Creates an GLES kernel +std::string float_to_string_with_full_precision(float val); + +/** Join a sequence of strings with separator @p sep * - * @param[in] ctx A context to be used to create the GLES kernel. - * @param[in] kernel_name The kernel name. - * @param[in] build_opts The build options to be used for the GLES kernel compilation. + * @param[in] strings Strings to join + * @param[in] sep Separator to join consecutive strings in the sequence * - * @return A GLES kernel + * @return std::string */ -GCKernel create_opengl_kernel(GCCoreRuntimeContext *ctx, const std::string &kernel_name, const std::set<std::string> &build_opts); +std::string join(const std::vector<std::string> strings, const std::string &sep); } // namespace arm_compute -#endif /* ARM_COMPUTE_GCHELPERS_H */ +#endif /*ARM_COMPUTE_CORE_UTILS_STRINGUTILS_H */ diff --git a/arm_compute/core/utils/misc/CRTP.h b/arm_compute/core/utils/helpers/AdjustVecSize.h index 037c69ab1d..842e3b57d6 100644 --- a/arm_compute/core/utils/misc/CRTP.h +++ b/arm_compute/core/utils/helpers/AdjustVecSize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,35 +21,35 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_MISC_CRTP_H -#define ARM_COMPUTE_MISC_CRTP_H +#ifndef ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H +#define ARM_COMPUTE_UTILS_ADJUSTVECSIZE_H + +#include "arm_compute/core/Error.h" namespace arm_compute { -namespace misc -{ -/** Curiously recurring template pattern Interface */ -template <typename T, template <typename> class Type> -struct CRTP +/** Returns the adjusted vector size in case it is less than the input's first dimension, getting rounded down to its closest valid vector size + * + * @param[in] vec_size vector size to be adjusted + * @param[in] dim0 size of the first dimension + * + * @return the number of element processed along the X axis per thread + */ +inline unsigned int adjust_vec_size(unsigned int vec_size, size_t dim0) { -public: - /** Exact type */ - using ExactType = T; + ARM_COMPUTE_ERROR_ON(vec_size > 16); -protected: - const T &impl() const + if ((vec_size >= dim0) && (dim0 == 3)) { - return static_cast<const T &>(*this); + return dim0; } - T &impl() + + while (vec_size > dim0) { - return static_cast<T &>(*this); + vec_size >>= 1; } -private: - CRTP() = default; - friend Type<T>; -}; -} // namespace misc + return vec_size; +} } // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CRTP_H */ +#endif /*ARM_COMPUTE_UTILS_H */ diff --git a/arm_compute/core/utils/helpers/bit_ops.h b/arm_compute/core/utils/helpers/bit_ops.h deleted file mode 100644 index 6dbca179e7..0000000000 --- a/arm_compute/core/utils/helpers/bit_ops.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H -#define ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H - -#include "arm_compute/core/utils/misc/Requires.h" - -#include <type_traits> - -namespace arm_compute -{ -namespace helpers -{ -namespace bit_ops -{ -/** Checks if the idx-th bit is set in an integral type - * - * @param[in] v Integral input - * @param[in] idx Index of the bit to check - * - * @return True if the idx-th bit is set else false - */ -template <typename T, REQUIRES_TA(std::is_integral<T>::value)> -bool is_bit_set(T v, unsigned int idx) -{ - return (v & 1 << idx) != 0; -} -} // namespace bit_ops -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_BIT_OPS_H */ diff --git a/arm_compute/core/utils/helpers/fft.h b/arm_compute/core/utils/helpers/fft.h deleted file mode 100644 index b22bece73f..0000000000 --- a/arm_compute/core/utils/helpers/fft.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_FFT_H -#define ARM_COMPUTE_UTILS_HELPERS_FFT_H - -#include <set> -#include <vector> - -namespace arm_compute -{ -namespace helpers -{ -namespace fft -{ -/** Decompose a given 1D input size using the provided supported factors. - * - * @param[in] N Input size to be decomposed. - * @param[in] supported_factors Supported factors that can be used for decomposition. - * - * @return A vector with the stages of the decomposition. Will be empty if decomposition failed. - */ -std::vector<unsigned int> decompose_stages(unsigned int N, const std::set<unsigned int> &supported_factors); -/** Calculate digit reverse index vector given fft size and the decomposed stages - * - * @param N Input size to calculate digit reverse for - * @param fft_stages A vector with the FFT decomposed stages - * - * @return A vector with the digit reverse indices. Will be empty if it failed. - */ -std::vector<unsigned int> digit_reverse_indices(unsigned int N, const std::vector<unsigned int> &fft_stages); -} // namespace fft -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_FFT_H */ diff --git a/arm_compute/core/utils/helpers/float_ops.h b/arm_compute/core/utils/helpers/float_ops.h deleted file mode 100644 index fceee2e3fe..0000000000 --- a/arm_compute/core/utils/helpers/float_ops.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H -#define ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H - -namespace arm_compute -{ -namespace helpers -{ -namespace float_ops -{ -union RawFloat -{ - /** Constructor - * - * @param[in] val Floating-point value - */ - explicit RawFloat(float val) - : f32(val) - { - } - /** Extract sign of floating point number - * - * @return Sign of floating point number - */ - int32_t sign() const - { - return i32 >> 31; - } - /** Extract exponent of floating point number - * - * @return Exponent of floating point number - */ - int32_t exponent() const - { - return (i32 >> 23) & 0xFF; - } - /** Extract mantissa of floating point number - * - * @return Mantissa of floating point number - */ - int32_t mantissa() const - { - return i32 & 0x007FFFFF; - } - - int32_t i32; - float f32; -}; - -/** Checks if two floating point numbers are equal given an allowed number of ULPs - * - * @param[in] a First number to compare - * @param[in] b Second number to compare - * @param[in] max_allowed_ulps (Optional) Number of allowed ULPs - * - * @return True if number is close else false - */ -inline bool is_equal_ulps(float a, float b, int max_allowed_ulps = 0) -{ - RawFloat ra(a); - RawFloat rb(b); - - // Check ULP distance - const int ulps = std::abs(ra.i32 - rb.i32); - return ulps <= max_allowed_ulps; -} - -/** Checks if the input floating point number is 1.0f checking if the difference is within a range defined with epsilon - * - * @param[in] a Input floating point number - * @param[in] epsilon (Optional) Epsilon used to define the error bounds - * - * @return True if number is close to 1.0f - */ -inline bool is_one(float a, float epsilon = 0.00001f) -{ - return std::abs(1.0f - a) <= epsilon; -} - -/** Checks if the input floating point number is 0.0f checking if the difference is within a range defined with epsilon - * - * @param[in] a Input floating point number - * @param[in] epsilon (Optional) Epsilon used to define the error bounds - * - * @return True if number is close to 0.0f - */ -inline bool is_zero(float a, float epsilon = 0.00001f) -{ - return std::abs(0.0f - a) <= epsilon; -} -} // namespace float_ops -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_FLOAT_OPS_H */ diff --git a/arm_compute/core/utils/helpers/tensor_info.h b/arm_compute/core/utils/helpers/tensor_info.h deleted file mode 100644 index da24e82f5a..0000000000 --- a/arm_compute/core/utils/helpers/tensor_info.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H -#define ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H - -#include "arm_compute/core/ITensorInfo.h" - -namespace arm_compute -{ -namespace helpers -{ -namespace tensor_info -{ -/** Checks if the quantization info of given tensors are different - * - * @param tensor_info_1 Tensor info of the first tensor - * @param tensor_info_2 Tensor info of the second tensor - * @param tensor_infos Tensor infos of the rest tensors - * - * @return True if tensors have mismatching quantization info else false. - */ -template <typename... Ts> -inline bool tensors_have_different_quantization_info(const ITensorInfo *tensor_info_1, const ITensorInfo *tensor_info_2, Ts... tensor_infos) -{ - const QuantizationInfo first_quantization_info = tensor_info_1->quantization_info(); - - const std::array < const ITensorInfo *, 1 + sizeof...(Ts) > tensor_infos_array{ { tensor_info_2, std::forward<Ts>(tensor_infos)... } }; - return std::any_of(tensor_infos_array.begin(), tensor_infos_array.end(), [&](const ITensorInfo * tensor_info) - { - return tensor_info->quantization_info() != first_quantization_info; - }); -} -} // namespace tensor_info -} // namespace helpers -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_INFO_H */ diff --git a/arm_compute/core/utils/helpers/tensor_transform.h b/arm_compute/core/utils/helpers/tensor_transform.h index 7e912a6f0a..7a61fa192a 100644 --- a/arm_compute/core/utils/helpers/tensor_transform.h +++ b/arm_compute/core/utils/helpers/tensor_transform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -52,7 +52,8 @@ int calculate_stride_on_index(int index, Coordinates strides); * * @return Absolute start position of a given index */ -int calculate_start_on_index(TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask); +int calculate_start_on_index( + TensorShape input_shape, int index, Coordinates starts, Coordinates strides, int32_t begin_mask); /** Returns the absolute end position of a given index for a strided slice operation * @@ -68,8 +69,13 @@ int calculate_start_on_index(TensorShape input_shape, int index, Coordinates sta * * @return Absolute end position of a given index */ -int calculate_end_on_index(TensorShape input_shape, int index, int start_on_index, Coordinates ends, Coordinates strides, - int32_t end_mask = 0, int32_t shrink_axis_mask = 0); +int calculate_end_on_index(TensorShape input_shape, + int index, + int start_on_index, + Coordinates ends, + Coordinates strides, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Calculate start, end and stride coordinates for a strided slice * @@ -87,8 +93,12 @@ int calculate_end_on_index(TensorShape input_shape, int index, int start_on_inde * @return A tuple with <Start,End,Strides> */ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords(TensorShape input_shape, - Coordinates starts, Coordinates ends, Coordinates strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0); + Coordinates starts, + Coordinates ends, + Coordinates strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0); /** Computes output shape of strided slice * @@ -109,9 +119,14 @@ std::tuple<Coordinates, Coordinates, Coordinates> calculate_strided_slice_coords * * @return The output tensor shape */ -TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordinates starts, Coordinates ends, Coordinates strides, - int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0, - bool return_unshrinked = false); +TensorShape compute_strided_slice_output_shape(TensorShape input_shape, + Coordinates starts, + Coordinates ends, + Coordinates strides, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t shrink_axis_mask = 0, + bool return_unshrinked = false); /** Constructs end mask in case we want to perform a slice operation using the strided slice interface * @@ -122,7 +137,7 @@ TensorShape compute_strided_slice_output_shape(TensorShape input_shape, Coordina * @return End mask */ int32_t construct_slice_end_mask(Coordinates ends); -} // namespace tensor_tranform +} // namespace tensor_transform } // namespace helpers } // namespace arm_compute #endif /* ARM_COMPUTE_UTILS_HELPERS_TENSOR_TRANSFORM_H */ diff --git a/arm_compute/core/utils/io/FileHandler.h b/arm_compute/core/utils/io/FileHandler.h index ebc2ef06c1..615651d5b1 100644 --- a/arm_compute/core/utils/io/FileHandler.h +++ b/arm_compute/core/utils/io/FileHandler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/core/utils/logging/FilePrinter.h b/arm_compute/core/utils/logging/FilePrinter.h index 73a5421ed4..a865aadddb 100644 --- a/arm_compute/core/utils/logging/FilePrinter.h +++ b/arm_compute/core/utils/logging/FilePrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,8 @@ #ifndef ARM_COMPUTE_LOGGING_FILE_PRINTER_H #define ARM_COMPUTE_LOGGING_FILE_PRINTER_H -#include "arm_compute/core/utils/logging/IPrinter.h" - #include "arm_compute/core/utils/io/FileHandler.h" +#include "arm_compute/core/utils/logging/IPrinter.h" namespace arm_compute { diff --git a/arm_compute/core/utils/logging/Helpers.h b/arm_compute/core/utils/logging/Helpers.h index 341f944ddc..c3c2f0f0b8 100644 --- a/arm_compute/core/utils/logging/Helpers.h +++ b/arm_compute/core/utils/logging/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define ARM_COMPUTE_LOGGING_HELPERS_H #include "arm_compute/core/utils/logging/Types.h" -#include "support/MemorySupport.h" + #include "support/ToolchainSupport.h" #include <cstddef> @@ -46,10 +46,10 @@ namespace logging * @return The formatted string */ template <typename... Ts> -inline std::string string_with_format(const std::string &fmt, Ts &&... args) +inline std::string string_with_format(const std::string &fmt, Ts &&...args) { size_t size = support::cpp11::snprintf(nullptr, 0, fmt.c_str(), args...) + 1; - auto char_str = support::cpp14::make_unique<char[]>(size); + auto char_str = std::make_unique<char[]>(size); support::cpp11::snprintf(char_str.get(), size, fmt.c_str(), args...); return std::string(char_str.get(), char_str.get() + size - 1); } diff --git a/arm_compute/core/utils/logging/IPrinter.h b/arm_compute/core/utils/logging/IPrinter.h index b6ede5853a..7fde4d9302 100644 --- a/arm_compute/core/utils/logging/IPrinter.h +++ b/arm_compute/core/utils/logging/IPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,7 @@ class Printer { public: /** Default Constructor */ - Printer() noexcept - : _mtx() + Printer() noexcept : _mtx() { } /** Prevent instances of this class from being copied */ diff --git a/arm_compute/core/utils/logging/LogMsgDecorators.h b/arm_compute/core/utils/logging/LogMsgDecorators.h index 08abcb4519..66a8180e21 100644 --- a/arm_compute/core/utils/logging/LogMsgDecorators.h +++ b/arm_compute/core/utils/logging/LogMsgDecorators.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,8 +63,7 @@ public: * * @param str Sting to append */ - StringDecorator(const std::string &str) - : _str(str) + StringDecorator(const std::string &str) : _str(str) { _str = angle_wrap_value(str); } @@ -103,7 +102,7 @@ private: auto time = std::chrono::system_clock::to_time_t(now); // TODO: use put_time for gcc > 4.9 - char buf[100] = { 0 }; + char buf[100] = {0}; std::strftime(buf, sizeof(buf), "%d-%m-%Y %I:%M:%S", std::localtime(&time)); return buf; } diff --git a/arm_compute/core/utils/logging/Logger.h b/arm_compute/core/utils/logging/Logger.h index 2bd467ae2b..608db39138 100644 --- a/arm_compute/core/utils/logging/Logger.h +++ b/arm_compute/core/utils/logging/Logger.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -88,7 +88,7 @@ public: * @param[in] args Message arguments */ template <typename... Ts> - void log(LogLevel log_level, const std::string &fmt, Ts &&... args); + void log(LogLevel log_level, const std::string &fmt, Ts &&...args); /** Sets log level of the logger * * @warning Not thread-safe @@ -159,11 +159,11 @@ private: }; template <typename... Ts> -inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&... args) +inline void Logger::log(LogLevel log_level, const std::string &fmt, Ts &&...args) { // Return if message shouldn't be logged // i.e. if log level does not match the logger's - if(!is_loggable(log_level)) + if (!is_loggable(log_level)) { return; } diff --git a/arm_compute/core/utils/logging/LoggerRegistry.h b/arm_compute/core/utils/logging/LoggerRegistry.h index c1a182c1ae..4e52a10935 100644 --- a/arm_compute/core/utils/logging/LoggerRegistry.h +++ b/arm_compute/core/utils/logging/LoggerRegistry.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/utils/logging/Logger.h" #include "arm_compute/core/utils/logging/Printers.h" #include "arm_compute/core/utils/logging/Types.h" + #include "support/Mutex.h" #include <memory> @@ -54,8 +55,9 @@ public: * @param[in] log_level Logger's log level. Defaults to INFO * @param[in] printers Printers to attach to the system loggers. Defaults with a @ref StdPrinter. */ - void create_logger(const std::string &name, LogLevel log_level = LogLevel::INFO, - const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() }); + void create_logger(const std::string &name, + LogLevel log_level = LogLevel::INFO, + const std::vector<std::shared_ptr<Printer>> &printers = {std::make_shared<StdPrinter>()}); /** Remove a logger * * @param name Logger's name @@ -74,16 +76,17 @@ public: * @param[in] printers (Optional) Printers to attach to the system loggers. Defaults with a @ref StdPrinter. */ void create_reserved_loggers(LogLevel log_level = LogLevel::INFO, - const std::vector<std::shared_ptr<Printer>> &printers = { std::make_shared<StdPrinter>() }); + const std::vector<std::shared_ptr<Printer>> &printers = { + std::make_shared<StdPrinter>()}); private: /** Default constructor */ LoggerRegistry(); private: - arm_compute::Mutex _mtx; + arm_compute::Mutex _mtx; std::unordered_map<std::string, std::shared_ptr<Logger>> _loggers; - static std::set<std::string> _reserved_loggers; + static std::set<std::string> _reserved_loggers; }; } // namespace logging } // namespace arm_compute diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h index e4d9734792..4d5aa5fe2c 100644 --- a/arm_compute/core/utils/logging/Macros.h +++ b/arm_compute/core/utils/logging/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,44 +30,71 @@ #ifdef ARM_COMPUTE_LOGGING_ENABLED +#ifdef __GNUC__ +inline std::string signature_name(const std::string &pretty_func) +{ + const auto scope_op = pretty_func.find("::"); + const auto begin = pretty_func.substr(0, scope_op).rfind(" ") + 1; + const auto end = pretty_func.rfind("(") - begin; + + return pretty_func.substr(begin, end) + "()"; +} +#define ARM_COMPUTE_SIGNATURE_NAME signature_name(__PRETTY_FUNCTION__) +#else /* __GNUC__ */ +#define ARM_COMPUTE_SIGNATURE_NAME (__func__) +#endif /* __GNUC__ */ + #define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ __logger->log(log_level, msg); \ } \ - } while(false) + } while (false) + +#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) \ + do \ + { \ + auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ + if (__logger != nullptr) \ + { \ + std::ostringstream s; \ + s << ARM_COMPUTE_SIGNATURE_NAME << " : " << msg; \ + __logger->log(log_level, s.str()); \ + } \ + } while (false) #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ size_t size = ::snprintf(nullptr, 0, fmt, __VA_ARGS__) + 1; \ - auto char_str = support::cpp14::make_unique<char[]>(size); \ - ::snprintf(char_str.get(), size, #fmt, __VA_ARGS__); \ + auto char_str = std::make_unique<char[]>(size); \ + ::snprintf(char_str.get(), size, fmt, __VA_ARGS__); \ __logger->log(log_level, std::string(char_str.get(), char_str.get() + size - 1)); \ } \ - } while(false) + } while (false) #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \ do \ { \ auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ - if(__logger != nullptr) \ + if (__logger != nullptr) \ { \ std::ostringstream s; \ s << stream; \ __logger->log(log_level, s.str()); \ } \ - } while(false) + } while (false) #else /* ARM_COMPUTE_LOGGING_ENABLED */ #define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) +#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) diff --git a/arm_compute/core/utils/logging/Printers.h b/arm_compute/core/utils/logging/Printers.h index e09880cc53..80493e7052 100644 --- a/arm_compute/core/utils/logging/Printers.h +++ b/arm_compute/core/utils/logging/Printers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/core/utils/logging/StdPrinter.h b/arm_compute/core/utils/logging/StdPrinter.h index ea41ce2599..eb0e78ee84 100644 --- a/arm_compute/core/utils/logging/StdPrinter.h +++ b/arm_compute/core/utils/logging/StdPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2019 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/arm_compute/core/utils/logging/Types.h b/arm_compute/core/utils/logging/Types.h index 838adf95b4..64c567b984 100644 --- a/arm_compute/core/utils/logging/Types.h +++ b/arm_compute/core/utils/logging/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,8 +44,7 @@ enum class LogLevel struct LogMsg { /** Default constructor */ - LogMsg() - : raw_(), log_level_(LogLevel::OFF) + LogMsg() : raw_(), log_level_(LogLevel::OFF) { } /** Construct a log message @@ -53,8 +52,7 @@ struct LogMsg * @param[in] msg Message to log. * @param[in] log_level Logging level. Default: OFF */ - LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) - : raw_(msg), log_level_(log_level) + LogMsg(std::string msg, LogLevel log_level = LogLevel::OFF) : raw_(msg), log_level_(log_level) { } diff --git a/arm_compute/core/utils/math/Math.h b/arm_compute/core/utils/math/Math.h new file mode 100644 index 0000000000..e70337ba0f --- /dev/null +++ b/arm_compute/core/utils/math/Math.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017-2018, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_UTILS_MATH_H +#define ARM_COMPUTE_UTILS_MATH_H + +namespace arm_compute +{ +/** Calculate the rounded up quotient of val / m. + * + * @param[in] val Value to divide and round up. + * @param[in] m Value to divide by. + * + * @return the result. + */ +template <typename S, typename T> +constexpr auto DIV_CEIL(S val, T m) -> decltype((val + m - 1) / m) +{ + return (val + m - 1) / m; +} + +/** Computes the smallest number larger or equal to value that is a multiple of divisor. + * + * @param[in] value Lower bound value + * @param[in] divisor Value to compute multiple of. + * + * @return the result. + */ +template <typename S, typename T> +inline auto ceil_to_multiple(S value, T divisor) -> decltype(((value + divisor - 1) / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return DIV_CEIL(value, divisor) * divisor; +} + +/** Computes the largest number smaller or equal to value that is a multiple of divisor. + * + * @param[in] value Upper bound value + * @param[in] divisor Value to compute multiple of. + * + * @return the result. + */ +template <typename S, typename T> +inline auto floor_to_multiple(S value, T divisor) -> decltype((value / divisor) * divisor) +{ + ARM_COMPUTE_ERROR_ON(value < 0 || divisor <= 0); + return (value / divisor) * divisor; +} + +} // namespace arm_compute +#endif /*ARM_COMPUTE_UTILS_MATH_H */ diff --git a/arm_compute/core/utils/math/SafeOps.h b/arm_compute/core/utils/math/SafeOps.h index 41bbb12e70..ef8bcf7e14 100644 --- a/arm_compute/core/utils/math/SafeOps.h +++ b/arm_compute/core/utils/math/SafeOps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,10 @@ #define ARM_COMPUTE_UTILS_MATH_SAFE_OPS #include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Requires.h" + +#include "support/AclRequires.h" + +#include <limits> namespace arm_compute { @@ -44,16 +47,16 @@ namespace math * * @return The addition result */ -template <typename T, REQUIRES_TA(std::is_integral<T>::value)> +template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> T safe_integer_add(T val_a, T val_b) { T result = 0; - if((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b)) + if ((val_b > 0) && (val_a > std::numeric_limits<T>::max() - val_b)) { result = std::numeric_limits<T>::max(); } - else if((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b)) + else if ((val_b < 0) && (val_a < std::numeric_limits<T>::min() - val_b)) { result = std::numeric_limits<T>::min(); } @@ -76,16 +79,16 @@ T safe_integer_add(T val_a, T val_b) * * @return The subtraction result */ -template <typename T, REQUIRES_TA(std::is_integral<T>::value)> +template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> T safe_integer_sub(T val_a, T val_b) { T result = 0; - if((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b)) + if ((val_b < 0) && (val_a > std::numeric_limits<T>::max() + val_b)) { result = std::numeric_limits<T>::max(); } - else if((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b)) + else if ((val_b > 0) && (val_a < std::numeric_limits<T>::min() + val_b)) { result = std::numeric_limits<T>::min(); } @@ -108,18 +111,18 @@ T safe_integer_sub(T val_a, T val_b) * * @return The multiplication result */ -template <typename T, REQUIRES_TA(std::is_integral<T>::value)> +template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> T safe_integer_mul(T val_a, T val_b) { T result = 0; - if(val_a > 0) + if (val_a > 0) { - if((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b))) + if ((val_b > 0) && (val_a > (std::numeric_limits<T>::max() / val_b))) { result = std::numeric_limits<T>::max(); } - else if(val_b < (std::numeric_limits<T>::min() / val_a)) + else if (val_b < (std::numeric_limits<T>::min() / val_a)) { result = std::numeric_limits<T>::min(); } @@ -130,11 +133,11 @@ T safe_integer_mul(T val_a, T val_b) } else { - if((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b))) + if ((val_b > 0) && (val_a < (std::numeric_limits<T>::min() / val_b))) { result = std::numeric_limits<T>::max(); } - else if((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a))) + else if ((val_a != 0) && (val_b < (std::numeric_limits<T>::max() / val_a))) { result = std::numeric_limits<T>::min(); } @@ -158,12 +161,12 @@ T safe_integer_mul(T val_a, T val_b) * * @return The quotient */ -template <typename T, REQUIRES_TA(std::is_integral<T>::value)> +template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> T safe_integer_div(T val_a, T val_b) { T result = 0; - if((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1))) + if ((val_b == 0) || ((val_a == std::numeric_limits<T>::min()) && (val_b == -1))) { result = std::numeric_limits<T>::min(); } @@ -174,7 +177,7 @@ T safe_integer_div(T val_a, T val_b) return result; } -} // namespace cast +} // namespace math } // namespace utils } // namespace arm_compute #endif /* ARM_COMPUTE_UTILS_MATH_SAFE_OPS */ diff --git a/arm_compute/core/utils/misc/Cast.h b/arm_compute/core/utils/misc/Cast.h deleted file mode 100644 index fc6246aace..0000000000 --- a/arm_compute/core/utils/misc/Cast.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_CAST_H -#define ARM_COMPUTE_MISC_CAST_H - -#include "arm_compute/core/Error.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -/** Polymorphic cast between two types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source> -inline Target polymorphic_cast(Source *v) -{ - if(dynamic_cast<Target>(v) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - return static_cast<Target>(v); -} - -/** Polymorphic down cast between two types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source> -inline Target polymorphic_downcast(Source *v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast<Target>(v) != static_cast<Target>(v)); - return static_cast<Target>(v); -} - -/** Polymorphic cast between two unique pointer types - * - * @warning Will throw an exception if cast cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source, typename Deleter> -std::unique_ptr<Target, Deleter> polymorphic_cast_unique_ptr(std::unique_ptr<Source, Deleter> &&v) -{ - if(dynamic_cast<Target *>(v.get()) == nullptr) - { - ARM_COMPUTE_THROW(std::bad_cast()); - } - auto r = static_cast<Target *>(v.release()); - return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter())); -} - -/** Polymorphic down cast between two unique pointer types - * - * @warning Will assert if cannot take place - * - * @tparam Target Target to cast type - * @tparam Source Source from cast type - * @tparam Deleter Deleter function type - * - * @param[in] v Value to cast - * - * @return The casted value - */ -template <typename Target, typename Source, typename Deleter> -std::unique_ptr<Target, Deleter> polymorphic_downcast_unique_ptr(std::unique_ptr<Source, Deleter> &&v) -{ - ARM_COMPUTE_ERROR_ON(dynamic_cast<Target *>(v.get()) != static_cast<Target *>(v.get())); - auto r = static_cast<Target *>(v.release()); - return std::unique_ptr<Target, Deleter>(r, std::move(v.get_deleter())); -} -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_CAST_H */ diff --git a/arm_compute/core/utils/misc/ICloneable.h b/arm_compute/core/utils/misc/ICloneable.h deleted file mode 100644 index 064f408201..0000000000 --- a/arm_compute/core/utils/misc/ICloneable.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ICLONEABLE_H -#define ARM_COMPUTE_MISC_ICLONEABLE_H - -#include <memory> - -namespace arm_compute -{ -namespace misc -{ -/** Clonable Interface */ -template <class T> -class ICloneable -{ -public: - /** Default virtual desctructor */ - virtual ~ICloneable() = default; - /** Provide a clone of the current object of class T - * - * @return Clone object of class T - */ - virtual std::unique_ptr<T> clone() const = 0; -}; -} // namespace misc -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ICLONEABLE_H */ diff --git a/arm_compute/core/utils/misc/InfoHelpers.h b/arm_compute/core/utils/misc/InfoHelpers.h index c6ee7c9031..1d1b4ea8d7 100644 --- a/arm_compute/core/utils/misc/InfoHelpers.h +++ b/arm_compute/core/utils/misc/InfoHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,10 +53,12 @@ inline bool is_relu(ActivationLayerInfo activation_info) */ inline bool is_relu6(ActivationLayerInfo activation_info) { - const bool is_lu_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU - && activation_info.a() == 6.f && activation_info.b() == 0.f; - const bool is_bounded_relu = activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU - && activation_info.a() == 6.f; + const bool is_lu_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU && + activation_info.a() == 6.f && activation_info.b() == 0.f; + const bool is_bounded_relu = + activation_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && + activation_info.a() == 6.f; return activation_info.enabled() && (is_lu_bounded_relu || is_bounded_relu); } @@ -68,50 +70,52 @@ inline bool is_relu6(ActivationLayerInfo activation_info) * */ template <typename T> -inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, - LSTMParams<ITensorInfo> *lstm_params_info) +inline void build_lstm_params_tensor_info(const LSTMParams<T> &lstm_params, LSTMParams<ITensorInfo> *lstm_params_info) { - if(lstm_params.has_peephole_opt()) + if (lstm_params.has_peephole_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights()); - lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), lstm_params.cell_to_output_weights()->info()); + lstm_params_info->set_peephole_params(lstm_params.cell_to_forget_weights()->info(), + lstm_params.cell_to_output_weights()->info()); } - if(lstm_params.has_projection()) + if (lstm_params.has_projection()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.projection_weights()); - lstm_params_info->set_projection_params(lstm_params.projection_weights()->info(), - lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); + lstm_params_info->set_projection_params( + lstm_params.projection_weights()->info(), + lstm_params.projection_bias() != nullptr ? lstm_params.projection_bias()->info() : nullptr); } - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias()); + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), + lstm_params.input_gate_bias()); - const ITensorInfo *cell_to_input_weights_info = (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; - lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), lstm_params.recurrent_to_input_weights()->info(), - cell_to_input_weights_info, lstm_params.input_gate_bias()->info()); + ITensorInfo *cell_to_input_weights_info = + (lstm_params.has_peephole_opt()) ? lstm_params.cell_to_input_weights()->info() : nullptr; + lstm_params_info->set_cifg_params(lstm_params.input_to_input_weights()->info(), + lstm_params.recurrent_to_input_weights()->info(), cell_to_input_weights_info, + lstm_params.input_gate_bias()->info()); } - if(lstm_params.use_layer_norm()) + if (lstm_params.use_layer_norm()) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), - lstm_params.output_layer_norm_weights(), + ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.output_layer_norm_weights(), lstm_params.cell_layer_norm_weights()); - if(!lstm_params.has_cifg_opt()) + if (!lstm_params.has_cifg_opt()) { ARM_COMPUTE_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights()); } - const ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info(); - const ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info(); - const ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info(); - const ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); + ITensorInfo *forget_info = lstm_params.forget_layer_norm_weights()->info(); + ITensorInfo *cell_info = lstm_params.cell_layer_norm_weights()->info(); + ITensorInfo *output_info = lstm_params.output_layer_norm_weights()->info(); + ITensorInfo *input_info = lstm_params.has_cifg_opt() ? nullptr : lstm_params.input_layer_norm_weights()->info(); lstm_params_info->set_layer_normalization_params(input_info, forget_info, cell_info, output_info); } - lstm_params_info->set_matmul_scale_params(lstm_params.input_intermediate_scale(), - lstm_params.forget_intermediate_scale(), - lstm_params.cell_intermediate_scale(), - lstm_params.output_intermediate_scale()); + lstm_params_info->set_matmul_scale_params( + lstm_params.input_intermediate_scale(), lstm_params.forget_intermediate_scale(), + lstm_params.cell_intermediate_scale(), lstm_params.output_intermediate_scale()); lstm_params_info->set_hidden_state_params(lstm_params.hidden_state_zero(), lstm_params.hidden_state_scale()); } diff --git a/arm_compute/core/utils/misc/Iterable.h b/arm_compute/core/utils/misc/Iterable.h deleted file mode 100644 index 829c4b44a8..0000000000 --- a/arm_compute/core/utils/misc/Iterable.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_ITERABLE_H -#define ARM_COMPUTE_MISC_ITERABLE_H - -#include <iterator> - -namespace arm_compute -{ -namespace utils -{ -namespace iterable -{ -/** Reverse range iterable class - * - * @tparam T Type to create a reverse range on - */ -template <typename T> -class reverse_iterable -{ -public: - /** Default constructor - * - * @param[in] it Value to reverse iterate on - */ - explicit reverse_iterable(T &it) - : _it(it) - { - } - - /** Get beginning of iterator. - * - * @return beginning of iterator. - */ - typename T::reverse_iterator begin() - { - return _it.rbegin(); - } - - /** Get end of iterator. - * - * @return end of iterator. - */ - typename T::reverse_iterator end() - { - return _it.rend(); - } - - /** Get beginning of const iterator. - * - * @return beginning of const iterator. - */ - typename T::const_reverse_iterator cbegin() - { - return _it.rbegin(); - } - - /** Get end of const iterator. - * - * @return end of const iterator. - */ - typename T::const_reverse_iterator cend() - { - return _it.rend(); - } - -private: - T &_it; -}; - -/** Creates a reverse iterable for a given type - * - * @tparam T Type to create a reverse iterable on - * - * @param[in] val Iterable input - * - * @return Reverse iterable container - */ -template <typename T> -reverse_iterable<T> reverse_iterate(T &val) -{ - return reverse_iterable<T>(val); -} -} // namespace iterable -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_ITERABLE_H */ diff --git a/arm_compute/core/utils/misc/MMappedFile.h b/arm_compute/core/utils/misc/MMappedFile.h index 7669c5cc96..3efdbc5bda 100644 --- a/arm_compute/core/utils/misc/MMappedFile.h +++ b/arm_compute/core/utils/misc/MMappedFile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #ifndef ARM_COMPUTE_MISC_MMAPPED_FILE_H #define ARM_COMPUTE_MISC_MMAPPED_FILE_H -#if !defined(BARE_METAL) +#if !defined(_WIN64) && !defined(BARE_METAL) #include <string> #include <utility> @@ -105,6 +105,6 @@ private: } // namespace mmap_io } // namespace utils } // namespace arm_compute -#endif // !defined(BARE_METAL) +#endif // !defined(_WIN64) &&!defined(BARE_METAL) #endif /* ARM_COMPUTE_MISC_MMAPPED_FILE_H */ diff --git a/arm_compute/core/utils/misc/Macros.h b/arm_compute/core/utils/misc/Macros.h index 6e8d7659ee..fa861fa442 100644 --- a/arm_compute/core/utils/misc/Macros.h +++ b/arm_compute/core/utils/misc/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,15 +26,16 @@ #if defined(__cplusplus) && (__cplusplus >= 201402L) -#define ARM_COMPUTE_DEPRECATED [[deprecated]] -#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] +#define ARM_COMPUTE_DEPRECATED [[deprecated]] +#define ARM_COMPUTE_DEPRECATED_REL(rel) [[deprecated("Deprecated in : " #rel)]] #define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) [[deprecated("Deprecated in : " #rel " - Use : " #replace)]] #elif defined(__GNUC__) || defined(__clang__) -#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) +#define ARM_COMPUTE_DEPRECATED __attribute__((deprecated)) #define ARM_COMPUTE_DEPRECATED_REL(rel) __attribute__((deprecated("Deprecated in : " #rel))) -#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) +#define ARM_COMPUTE_DEPRECATED_REL_REPLACE(rel, replace) \ + __attribute__((deprecated("Deprecated in : " #rel " - Use : " #replace))) #else // defined(__cplusplus) && (__cplusplus >= 201402L) diff --git a/arm_compute/core/utils/misc/Random.h b/arm_compute/core/utils/misc/Random.h deleted file mode 100644 index 9f5a128546..0000000000 --- a/arm_compute/core/utils/misc/Random.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_MISC_RANDOM_H -#define ARM_COMPUTE_MISC_RANDOM_H - -#include "arm_compute/core/Error.h" - -#include <random> -#include <type_traits> - -namespace arm_compute -{ -namespace utils -{ -namespace random -{ -/** Uniform distribution within a given number of sub-ranges - * - * @tparam T Distribution primitive type - */ -template <typename T> -class RangedUniformDistribution -{ -public: - using DT = typename std::conditional<std::is_integral<T>::value, - std::uniform_int_distribution<T>, - std::uniform_real_distribution<float>>::type; - using result_type = T; - using range_pair = std::pair<result_type, result_type>; - -public: - /** Constructor - * - * @param[in] low lowest value in the range (inclusive) - * @param[in] high highest value in the range (inclusive for uniform_int_distribution, exclusive for uniform_real_distribution) - * @param[in] exclude_ranges Ranges to exclude from the generator - */ - RangedUniformDistribution(result_type low, result_type high, const std::vector<range_pair> &exclude_ranges) - : _distributions(), _selector() - { - result_type clow = low; - for(const auto &erange : exclude_ranges) - { - result_type epsilon = std::is_integral<result_type>::value ? 1 : static_cast<result_type>(std::numeric_limits<float>::epsilon()); - - ARM_COMPUTE_ERROR_ON(clow > erange.first || clow >= erange.second); - - _distributions.emplace_back(DT(clow, erange.first - epsilon)); - clow = erange.second + epsilon; - } - ARM_COMPUTE_ERROR_ON(clow > high); - _distributions.emplace_back(DT(clow, high)); - _selector = std::uniform_int_distribution<uint32_t>(0, _distributions.size() - 1); - } - /** Generate random number - * - * @tparam URNG Random number generator object type - * - * @param[in] g A uniform random number generator object, used as the source of randomness. - * - * @return A new random number. - */ - template <class URNG> - result_type operator()(URNG &g) - { - unsigned int rand_select = _selector(g); - return _distributions[rand_select](g); - } - -private: - std::vector<DT> _distributions; - std::uniform_int_distribution<uint32_t> _selector; -}; -} // namespace random -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_RANDOM_H */ diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h deleted file mode 100644 index 33c6fa3096..0000000000 --- a/arm_compute/core/utils/misc/Requires.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_REQUIRES_H -#define ARM_COMPUTE_UTILS_REQUIRES_H - -namespace arm_compute -{ -namespace utils -{ -namespace requires -{ -// *INDENT-OFF* -// clang-format off -namespace detail -{ -enum class enabler -{ -}; -} // namespace arm_compute - -/** Requirements as template */ -#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0> -/** Requirements as template argument */ -#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type -// clang-format on -// *INDENT-ON* -} // namespace requires -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_REQUIRES_H */ diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h deleted file mode 100644 index 650137a473..0000000000 --- a/arm_compute/core/utils/misc/Rounding.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_ROUNDING_H -#define ARM_COMPUTE_UTILS_ROUNDING_H - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Requires.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "support/ToolchainSupport.h" - -#include <cmath> - -namespace arm_compute -{ -namespace utils -{ -namespace rounding -{ -/** Rounding mode */ -enum class RoundingMode -{ - TO_ZERO, /**< Round towards zero */ - AWAY_FROM_ZERO, /**< Round away from zero */ - HALF_TO_ZERO, /**< Round half towards from zero */ - HALF_AWAY_FROM_ZERO, /**< Round half away from zero */ - HALF_UP, /**< Round half towards positive infinity */ - HALF_DOWN, /**< Round half towards negative infinity */ - HALF_EVEN /**< Round half towards nearest even */ -}; - -/** Round floating-point value with round to zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_to_zero(T value) -{ - T res = std::floor(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with round away from zero - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_away_from_zero(T value) -{ - T res = std::ceil(std::fabs(value)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding towards zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_to_zero(T value) -{ - T res = T(std::ceil(std::fabs(value) - 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding away from zero. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_away_from_zero(T value) -{ - T res = T(std::floor(std::fabs(value) + 0.5f)); - return (value < 0.f) ? -res : res; -} - -/** Round floating-point value with half value rounding to positive infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_up(T value) -{ - return std::floor(value + 0.5f); -} - -/** Round floating-point value with half value rounding to negative infinity. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_down(T value) -{ - return std::ceil(value - 0.5f); -} - -/** Round floating-point value with half value rounding to nearest even. - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] epsilon precision. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon()) -{ - T positive_value = std::abs(value); - T ipart = 0; - std::modf(positive_value, &ipart); - // If 'value' is exactly halfway between two integers - if(std::abs(positive_value - (ipart + 0.5f)) < epsilon) - { - // If 'ipart' is even then return 'ipart' - if(std::fmod(ipart, 2.f) < epsilon) - { - return support::cpp11::copysign(ipart, value); - } - // Else return the nearest even integer - return support::cpp11::copysign(std::ceil(ipart + 0.5f), value); - } - // Otherwise use the usual round to closest - return support::cpp11::copysign(support::cpp11::round(positive_value), value); -} - -/** Round floating-point value given a rounding mode - * - * @tparam T Parameter type. Should be of floating point type. - * - * @param[in] value floating-point value to be rounded. - * @param[in] rounding_mode Rounding mode to use. - * - * @return Floating-point value of rounded @p value. - */ -template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> -inline T round(T value, RoundingMode rounding_mode) -{ - switch(rounding_mode) - { - case RoundingMode::TO_ZERO: - return round_to_zero(value); - case RoundingMode::AWAY_FROM_ZERO: - return round_away_from_zero(value); - case RoundingMode::HALF_TO_ZERO: - return round_half_to_zero(value); - case RoundingMode::HALF_AWAY_FROM_ZERO: - return round_half_away_from_zero(value); - case RoundingMode::HALF_UP: - return round_half_up(value); - case RoundingMode::HALF_DOWN: - return round_half_down(value); - case RoundingMode::HALF_EVEN: - return round_half_even(value); - default: - ARM_COMPUTE_ERROR("Unsupported rounding mode!"); - } -} -} // namespace rounding -} // namespace utils -} // namespace arm_compute -#endif /*ARM_COMPUTE_UTILS_ROUNDING_H */ diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h deleted file mode 100644 index 0241c64b14..0000000000 --- a/arm_compute/core/utils/misc/SaturateCast.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H -#define ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H - -#include "arm_compute/core/utils/misc/Rounding.h" -#include "arm_compute/core/utils/misc/Traits.h" -#include "arm_compute/core/utils/misc/Utility.h" - -namespace arm_compute -{ -namespace utils -{ -namespace cast -{ -// *INDENT-OFF* -// clang-format off -// same type -template<typename T, - typename U, - typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 > -T saturate_cast(U v) -{ - return v; -} - -// signed -> signed widening/same_width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// signed -> signed narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); -} - -// unsigned -> signed widening -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - (sizeof(T) > sizeof(U)), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// unsigned -> signed narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); -} -// unsigned -> signed same_width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<U>() && - std::is_signed<T>() && - !std::is_same<T, U>::value && - sizeof(T) == sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); -} - -// signed -> unsigned widening/same width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_unsigned<T>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(std::max<U>(0, v)); -} - -// signed -> unsigned narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_signed<U>() && - std::is_unsigned<T>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max())); -} - -// unsigned -> unsigned widening/same width -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<T>() && - std::is_unsigned<U>() && - !std::is_same<T, U>::value && - sizeof(T) >= sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} - -// unsigned -> unsigned narrowing -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - std::is_integral<U>::value && - std::is_unsigned<T>() && - std::is_unsigned<U>() && - !std::is_same<T, U>::value && - sizeof(T) < sizeof(U), - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); -} - -// float -> int -template<typename T, - typename U, - typename std::enable_if<std::is_integral<T>::value && - traits::is_floating_point<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - int32_t vi = utils::rounding::round_half_away_from_zero(v); - return saturate_cast<T>(vi); -} - -// int -> float -template<typename T, - typename U, - typename std::enable_if<traits::is_floating_point<T>::value && - std::is_integral<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} - -// float -> float -template<typename T, - typename U, - typename std::enable_if<traits::is_floating_point<T>::value && - traits::is_floating_point<U>::value, - int >::type = 0 > -inline T saturate_cast(U v) -{ - return static_cast<T>(v); -} -// clang-format on -// *INDENT-ON* -} // namespace cast -} // namespace utils -} // namespace arm_compute -#endif /* ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index dfccec8b37..e97d81390e 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H -#define ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H +#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Utils.h" - #include "arm_compute/core/utils/helpers/tensor_transform.h" +#include "arm_compute/function_info/ConvolutionInfo.h" +#include "arm_compute/runtime/FunctionDescriptors.h" #include <cmath> @@ -47,28 +48,35 @@ namespace shape_calculator * * @return the calculated shape */ -inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates &reduction_axis, bool keep_dims) +inline TensorShape calculate_reduce_mean_shape(ITensorInfo *input, const Coordinates &reduction_axis, bool keep_dims) { const int reduction_ops = reduction_axis.num_dimensions(); Coordinates axis_local = reduction_axis; - const int input_dims = input->info()->num_dimensions(); + const int input_dims = input->num_dimensions(); convert_negative_axis(axis_local, input_dims); - TensorShape out_shape = input->info()->tensor_shape(); + TensorShape out_shape = input->tensor_shape(); // Configure reshape layer if we want to drop the dimensions - if(!keep_dims) + if (!keep_dims) { // We have to sort the reduction axis vectors in order for remove_dimension // to work properly + +// Suppress warning produced by a compiler bug in GCC +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104165 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" std::sort(axis_local.begin(), axis_local.begin() + reduction_ops); - for(int i = 0; i < reduction_ops; ++i) +#pragma GCC diagnostic pop + + for (int i = 0; i < reduction_ops; ++i) { - out_shape.remove_dimension(axis_local[i] - i); + out_shape.remove_dimension(axis_local[i] - i, false); } return out_shape; } else { - for(int i = 0; i < reduction_ops; ++i) + for (int i = 0; i < reduction_ops; ++i) { out_shape.set(axis_local[i], 1); } @@ -84,7 +92,10 @@ inline TensorShape calculate_reduce_mean_shape(ITensor *input, const Coordinates * * @return the calculated shape */ -inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, size_t conv_w, size_t conv_h, const DataLayout &data_layout) +inline TensorShape compute_vector_to_tensor_output_shape(const TensorShape &input, + size_t conv_w, + size_t conv_h, + const DataLayout &data_layout) { const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -126,10 +137,12 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_ERROR_ON(stride <= 0); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), "The width of the input tensor must be a multiple of stride"); - ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), "The height of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_width] % stride != 0), + "The width of the input tensor must be a multiple of stride"); + ARM_COMPUTE_ERROR_ON_MSG((input.tensor_shape()[idx_height] % stride != 0), + "The height of the input tensor must be a multiple of stride"); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(idx_width, output_shape[idx_width] / stride); output_shape.set(idx_height, output_shape[idx_height] / stride); @@ -146,7 +159,8 @@ inline TensorShape compute_reorg_output_shape(const ITensorInfo &input, int32_t * * @return the calculated shape of the reshaped weights */ -inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) +inline TensorShape +compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) { // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. ARM_COMPUTE_ERROR_ON(num_groups == 0); @@ -154,14 +168,14 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); // Calculate output shape - TensorShape weights_reshaped{ weights.tensor_shape() }; + TensorShape weights_reshaped{weights.tensor_shape()}; weights_reshaped.set(3, weights_reshaped[3] / num_groups); weights_reshaped.collapse(3); const size_t tmp_dim = weights_reshaped[0]; weights_reshaped.set(0, weights_reshaped[1]); weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); - if(weights.num_dimensions() < 5) + if (weights.num_dimensions() < 5) { weights_reshaped.set(2, num_groups); } @@ -177,7 +191,9 @@ inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bo * * @return the calculated shape */ -inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false) +inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, + const GEMMLHSMatrixInfo &lhs_info, + bool reinterpret_input_as_3d = false) { ARM_COMPUTE_ERROR_ON(lhs_info.m0 == 0); ARM_COMPUTE_ERROR_ON(lhs_info.k0 == 0); @@ -198,11 +214,11 @@ inline TensorShape compute_lhs_reshaped_shape(const ITensorInfo &a, const GEMMLH const unsigned int output_width = block_size * num_horiz_blocks * lhs_info.v0; const unsigned int output_height = std::ceil(num_vert_blocks / static_cast<float>(lhs_info.v0)); - TensorShape lhs_shape{ a.tensor_shape() }; + TensorShape lhs_shape{a.tensor_shape()}; lhs_shape.set(0, output_width); lhs_shape.set(1, output_height); - if((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) + if ((reinterpret_input_as_3d) && (lhs_shape.num_dimensions() > 2)) { // When the data format is NHWC and the shapes are Nx1x1 // the tensor shape num_dimensions is automatically set to 1 instead of 3. @@ -242,7 +258,7 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH const unsigned int output_width = block_size * num_vert_blocks * rhs_info.h0; const unsigned int output_height = std::ceil(num_horiz_blocks / static_cast<float>(rhs_info.h0)); - TensorShape rhs_shape{ a.tensor_shape() }; + TensorShape rhs_shape{a.tensor_shape()}; rhs_shape.set(0, output_width); rhs_shape.set(1, output_height); @@ -257,14 +273,15 @@ inline TensorShape compute_rhs_reshaped_shape(const ITensorInfo &a, const GEMMRH * * @return the calculated shape */ -inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) +inline TensorShape +compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1, bool reinterpret_input_as_3d = false) { // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); const int interleave_width = 4 * mult_interleave4x4_height; - TensorShape shape_interleaved_a{ a.tensor_shape() }; + TensorShape shape_interleaved_a{a.tensor_shape()}; shape_interleaved_a.set(0, a.dimension(0) * interleave_width); - if(reinterpret_input_as_3d) + if (reinterpret_input_as_3d) { const int M = a.dimension(1) * a.dimension(2); const int height = std::ceil(M / static_cast<float>(interleave_width)); @@ -274,7 +291,7 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte // the tensor shape num_dimensions is automatically set to 1 instead of 3. // To avoid failures by removing a dimension that doesn't exist // check if the number of dimensions is greater than 2. - if(shape_interleaved_a.num_dimensions() > 2) + if (shape_interleaved_a.num_dimensions() > 2) { shape_interleaved_a.remove_dimension(2); } @@ -287,30 +304,6 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte return shape_interleaved_a; } -/** Calculate the reshaped shape of the weights to use in depthwise convolution - * - * @param[in] input Input tensor info - * @param[in] info Depthwise convolution information to be used for reshaping. - * - * @return the calculated shape - */ -inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info) -{ - const auto data_layout = input.data_layout(); - TensorShape weights_shape{}; - - const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const size_t num_channels = input.dimension(channel_idx); - const size_t num_rows = input.dimension(height_idx); - const size_t num_cols = input.dimension(width_idx); - - weights_shape.set(0, num_rows * num_cols * info.c0); - weights_shape.set(1, DIV_CEIL(num_channels, info.c0)); - return weights_shape; -} - /** Calculate the transposed 1xW shape * * @param[in] b Input tensor info @@ -320,7 +313,7 @@ inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &i inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b) { // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width / 16.0f) ] - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; shape_transposed1xW_b.set(0, b.dimension(1) * 16); shape_transposed1xW_b.set(1, std::ceil(b.dimension(0) / 16.f)); @@ -340,7 +333,7 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf // The transpose1xW output matrix will have the following shape: // [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1); - TensorShape shape_transposed1xW_b{ b.tensor_shape() }; + TensorShape shape_transposed1xW_b{b.tensor_shape()}; const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width; shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width); shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width)))); @@ -356,8 +349,8 @@ inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInf */ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) { - TensorShape shape_vector_sum_col{ b.tensor_shape() }; - if(shape_vector_sum_col.num_dimensions() > 1) + TensorShape shape_vector_sum_col{b.tensor_shape()}; + if (shape_vector_sum_col.num_dimensions() > 1) { shape_vector_sum_col.remove_dimension(1); } @@ -373,9 +366,9 @@ inline TensorShape compute_reductionA_shape(const ITensorInfo &b) */ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) { - TensorShape shape_vector_sum_row{ a.tensor_shape() }; + TensorShape shape_vector_sum_row{a.tensor_shape()}; shape_vector_sum_row.set(Window::DimX, a.dimension(1)); - if(shape_vector_sum_row.num_dimensions() > 1) + if (shape_vector_sum_row.num_dimensions() > 1) { shape_vector_sum_row.remove_dimension(1); } @@ -392,7 +385,10 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) * * @return the calculated shape */ -inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1) +inline TensorShape compute_col2im_shape(const ITensorInfo &input, + const Size2D &convolved_dims, + bool batch_size_on_z, + unsigned int num_groups = 1) { ARM_COMPUTE_ERROR_ON(num_groups == 0); ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area())); @@ -403,10 +399,10 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape col2im_shape{ input.tensor_shape() }; + TensorShape col2im_shape{input.tensor_shape()}; // If batches start on 3rd dimension shift dimensions right by 1 to retain upper tensor shape, // as first three will be override by H,W,C data - if(batch_size_on_z && num_groups == 1) + if (batch_size_on_z && num_groups == 1) { col2im_shape.shift_right(1); } @@ -425,29 +421,27 @@ inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D & */ inline TensorShape compute_transposed_shape(const ITensorInfo &input) { - TensorShape shape_transposed{ input.tensor_shape() }; + TensorShape shape_transposed{input.tensor_shape()}; - shape_transposed.set(0, input.dimension(1)); - shape_transposed.set(1, input.dimension(0)); + shape_transposed.set(0, input.dimension(1), false); + shape_transposed.set(1, input.dimension(0), false); return shape_transposed; } /** Calculate the depthwise convolution output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. - * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] info Convolution info * * @return the calculated shape */ -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, - 1U)) +inline TensorShape +compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -455,23 +449,54 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const DataLayout weights_data_layout = weights.data_layout(); - const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); - const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); + const int weights_width_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::WIDTH); + const int weights_height_idx = get_data_layout_dimension_index(weights_data_layout, DataLayoutDimension::HEIGHT); unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], - weights_shape[weights_width_idx], weights_shape[weights_height_idx], - conv_info, dilation); + std::tie(output_width, output_height) = + scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx], + weights_shape[weights_height_idx], info.pad_stride_info, info.dilation); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); - output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); + output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier); return output_shape; } +/** Calculate padding required for deconvolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] sx Stride on x axis + * @param[in] sy Stride on y axis + * @param[in] out_dims Output shape dimensions + * + * @return the padding required + */ +inline std::pair<int32_t, int32_t> compute_deconvolution_padding(const ITensorInfo &input, + const ITensorInfo &weights, + int32_t sx, + int32_t sy, + std::pair<uint32_t, uint32_t> out_dims) +{ + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + int32_t out_x = (static_cast<int32_t>(input.dimension(idx_w)) - 1) * sx + 1; + int32_t out_y = (static_cast<int32_t>(input.dimension(idx_h)) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + int32_t padx = out_dims.first - (out_x - static_cast<int32_t>(weights.dimension(idx_w)) + 1); + int32_t pady = out_dims.second - (out_y - static_cast<int32_t>(weights.dimension(idx_h)) + 1); + + return std::make_pair(padx, pady); +} + /** Calculate the upsampled output shape used for deconvolution * * @param[in] input Input tensor info @@ -484,20 +509,28 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, - std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady) +inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, + const ITensorInfo &weights, + unsigned int sx, + unsigned int sy, + std::pair<unsigned int, unsigned int> &out_dims, + uint32_t &padx, + uint32_t &pady) { + // Find the padding needed for the convolution with stride 1 in order to match output shape + const auto padxy = + compute_deconvolution_padding(input, weights, static_cast<int32_t>(sx), static_cast<int32_t>(sy), out_dims); + padx = static_cast<uint32_t>(padxy.first); + pady = static_cast<uint32_t>(padxy.second); + const DataLayout data_layout = input.data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Find the upsampled dimensions - unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; - unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1; + uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1; - // Find the padding needed for the convolution with stride 1 in order to match output shape - padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); - pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; @@ -516,10 +549,12 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &inpu * * @return the calculated shape */ -inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights) +inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, + const ITensorInfo &input, + const ITensorInfo &weights) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; + const TensorShape input_shape{input.tensor_shape()}; + const TensorShape weights_shape{weights.tensor_shape()}; const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -527,7 +562,7 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - TensorShape out_shape{ input_shape }; + TensorShape out_shape{input_shape}; out_shape.set(width_idx, out_dims.first); out_shape.set(height_idx, out_dims.second); out_shape.set(channel_idx, weights_shape[batch_idx]); @@ -543,11 +578,18 @@ inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned i * @param[in] dilation Dilation, in elements, across x and y * @param[in] batch_size_on_z True if batch size is on z axis * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] input_pad_right (Optional) When fast-math is selected, per element padding for the im2col matrix may be necessary * * @return the calculated shape */ -inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, - unsigned int num_groups = 1) +inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, + const Size2D &kernel_dims, + const PadStrideInfo &conv_info, + bool has_bias, + const Size2D &dilation, + bool batch_size_on_z, + unsigned int num_groups = 1, + unsigned int input_pad_right = 0) { // The output shape will be the 3D shape [ out_channels * kernel_area, num_elems_per_out_channel, batches ] if batch_size_on_z == true // or the 4D shape [ out_channels * kernel_area / num_groups, num_elems_per_out_channel, num_groups, batches ] if batch_size_on_z == false @@ -556,17 +598,19 @@ inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Siz ARM_COMPUTE_ERROR_ON(num_groups > 1 && input->data_layout() != DataLayout::NCHW); ARM_COMPUTE_ERROR_ON(num_groups > 1 && batch_size_on_z); - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); - output_shape.set(0, (output_shape[channel_idx] / num_groups * kernel_dims.area() + (has_bias ? 1 : 0))); // NOLINT + std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions( + output_shape[width_idx], output_shape[height_idx], kernel_dims.width, kernel_dims.height, conv_info, dilation); + output_shape.set(0, ((output_shape[channel_idx] + input_pad_right) / num_groups * kernel_dims.area() + + (has_bias ? 1 : 0))); // NOLINT output_shape.set(1, (out_dims.first * out_dims.second)); - if(batch_size_on_z && output_shape.num_dimensions() >= 3) + if (batch_size_on_z && output_shape.num_dimensions() >= 3) { output_shape.remove_dimension(2); } @@ -588,7 +632,7 @@ inline TensorShape compute_flatten_shape(const ITensorInfo *input) { // The output shape will be the flatten version of the input (i.e. [ width * height * channels, num_batches, ... ] ). Used for FlattenLayer and FullyConnectedLayer. - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.collapse(3); @@ -610,7 +654,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = // - [x,y,z,w] and axis 3 will return [x*y*z, w] TensorShape shape2D = input->tensor_shape(); - if(axis < input->num_dimensions()) + if (axis < input->num_dimensions()) { // Collapse from axis onward (this changes the shape) shape2D.collapse_from(axis); @@ -624,7 +668,7 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = shape2D.collapse(shape2D.num_dimensions()); } - if(axis == 0) + if (axis == 0) { // If axis is zero the first dim should be one. Since // collapse is an inclusive operation we need to shift @@ -643,15 +687,17 @@ inline TensorShape compute_softmax_shape(const ITensorInfo *input, size_t axis = */ inline TensorShape compute_winograd_filter_transform_shape(const ITensorInfo &input, const WinogradInfo &winograd_info) { - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); tensor_shape.remove_dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH)); tensor_shape.set(Window::DimX, input.dimension(3)); - tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL))); + tensor_shape.set(Window::DimY, input.dimension(get_data_layout_dimension_index(input.data_layout(), + DataLayoutDimension::CHANNEL))); tensor_shape.set(Window::DimZ, input_tile_size.area()); return tensor_shape; @@ -669,23 +715,22 @@ inline TensorShape compute_winograd_input_transform_shape(const ITensorInfo &inp const PadStrideInfo conv_info = winograd_info.convolution_info; const Size2D kernel_size = winograd_info.kernel_size; const Size2D output_tile_size = winograd_info.output_tile_size; - const Size2D input_tile_size = Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); + const Size2D input_tile_size = + Size2D(output_tile_size.width + kernel_size.width - 1, output_tile_size.height + kernel_size.height - 1); const size_t idx_w = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); const size_t idx_c = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); // Compute the number of output tiles along the x and y direction of size "output_tile_size" - const Size2D num_tiles = compute_winograd_convolution_tiles(Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), - kernel_size, - output_tile_size, - conv_info); + const Size2D num_tiles = compute_winograd_convolution_tiles( + Size2D(input.tensor_shape()[idx_w], input.tensor_shape()[idx_h]), kernel_size, output_tile_size, conv_info); const unsigned int width = input.tensor_shape()[idx_c]; const unsigned int height = num_tiles.area(); const unsigned int depth = input_tile_size.area(); - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; output_shape.set(0, width); output_shape.set(1, height); output_shape.set(2, depth); @@ -708,12 +753,12 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in const DataLayout data_layout = winograd_info.output_data_layout; // Compute output shape - unsigned int output_width = 0; - unsigned int output_height = 0; + unsigned int output_width = 0; + unsigned int output_height = 0; std::tie(output_width, output_height) = scaled_dimensions(input_dimensions.width, input_dimensions.height, kernel_size.width, kernel_size.height, conv_info); - TensorShape tensor_shape{ input.tensor_shape() }; + TensorShape tensor_shape{input.tensor_shape()}; // Output dimension const unsigned int out_w = output_width; @@ -729,20 +774,21 @@ inline TensorShape compute_winograd_output_transform_shape(const ITensorInfo &in /** Calculate the deep convolution shape output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Contains padding and stride information + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information * * @return the calculated shape */ -inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +inline TensorShape compute_deep_convolution_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info) { - const TensorShape input_shape{ input.tensor_shape() }; - const TensorShape weights_shape{ weights.tensor_shape() }; - - const size_t idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_channel = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::CHANNEL); + const size_t idx_width = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::WIDTH); + const size_t idx_height = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_channel = get_data_layout_dimension_index(input_data_layout, DataLayoutDimension::CHANNEL); const unsigned int input_width = input_shape[idx_width]; const unsigned int input_height = input_shape[idx_height]; @@ -751,9 +797,10 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons const unsigned int weights_out_channel = weights_shape[3]; unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); + std::tie(output_width, output_height) = + scaled_dimensions(input_width, input_height, weights_width, weights_height, conv_info); - TensorShape output_shape{ input_shape }; + TensorShape output_shape{input_shape}; output_shape.set(idx_width, output_width); output_shape.set(idx_height, output_height); output_shape.set(idx_channel, weights_out_channel); @@ -761,6 +808,53 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons return output_shape; } +/** Calculate the deep convolution shape output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] conv_info Contains padding and stride information + * + * @return the calculated shape + */ +inline TensorShape +compute_deep_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info) +{ + return compute_deep_convolution_shape(input.tensor_shape(), input.data_layout(), weights.tensor_shape(), conv_info); +} + +/** Calculate the indirect buffer output shape used by the indirect convolution function + * + * @param[in] input_shape Input tensor shape + * @param[in] input_data_layout Input data layout + * @param[in] weights_shape Weights tensor shape + * @param[in] conv_info Contains padding and stride information + * @param[in] desc Contains the direct/indirect convolution compute arguments, such as the tiling dimensions + * + * @return the calculated shape + */ +inline TensorShape compute_indirect_buffer_shape(const TensorShape &input_shape, + DataLayout input_data_layout, + const TensorShape &weights_shape, + const PadStrideInfo &conv_info, + const DirectConvComputeKernelInfo &desc) +{ + ARM_COMPUTE_ERROR_ON_MSG(input_data_layout != DataLayout::NHWC, "The data layout can only be NHWC"); + ARM_COMPUTE_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8, "M0 can only be greater than 0 and less than or equal to 8"); + + const unsigned int m0 = desc.m0; + const unsigned int kw = weights_shape[1]; + const unsigned int kh = weights_shape[2]; + + TensorShape output_conv2d_shape = + compute_deep_convolution_shape(input_shape, input_data_layout, weights_shape, conv_info); + + const unsigned int output_w = m0 * kw * kh; + const unsigned int output_h = DIV_CEIL(output_conv2d_shape[1] * output_conv2d_shape[2], m0); + const unsigned int output_b = output_conv2d_shape[3]; + + return TensorShape(output_w, output_h, output_b); +} + /** Calculate the min/max shape output shape of a tensor * * @param[in] input Input tensor info @@ -769,7 +863,7 @@ inline TensorShape compute_deep_convolution_shape(const ITensorInfo &input, cons */ inline TensorShape compute_min_max_shape(const ITensorInfo *input) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(Window::DimX, 2); output_shape.remove_dimension(1); output_shape.remove_dimension(1); @@ -786,29 +880,63 @@ inline TensorShape compute_min_max_shape(const ITensorInfo *input) */ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info) { - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; + int pooled_w = 0; + int pooled_h = 0; + + TensorShape output_shape{input.tensor_shape()}; - TensorShape output_shape{ input.tensor_shape() }; + const bool is_global_pooling = pool_info.is_global_pooling; + const int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); + const int input_width = input.tensor_shape()[idx_width]; + const int input_height = input.tensor_shape()[idx_height]; + const int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width; + const int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height; - const bool is_global_pooling = pool_info.is_global_pooling; - const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); - const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); - const unsigned int pool_size_x = is_global_pooling ? output_shape[idx_width] : pool_info.pool_size.width; - const unsigned int pool_size_y = is_global_pooling ? output_shape[idx_height] : pool_info.pool_size.height; + std::tie(pooled_w, pooled_h) = + scaled_dimensions_signed(input_width, input_height, pool_size_x, pool_size_y, pool_info.pad_stride_info); - std::tie(pooled_w, pooled_h) = scaled_dimensions(output_shape[idx_width], - output_shape[idx_height], - pool_size_x, - pool_size_y, - pool_info.pad_stride_info); + ARM_COMPUTE_ERROR_ON_MSG((pooled_w < 1 || pooled_h < 1), "Calculated output dimension size is invalid"); - output_shape.set(idx_width, pooled_w); - output_shape.set(idx_height, pooled_h); + output_shape.set(idx_width, static_cast<size_t>(pooled_w)); + output_shape.set(idx_height, static_cast<size_t>(pooled_h)); return output_shape; } +/** Calculate the output unpool shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] pool_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_unpool_shape(const ITensorInfo &input, PoolingLayerInfo pool_info) +{ + const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); + const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); + const TensorShape input_shape = input.tensor_shape(); + ARM_COMPUTE_ERROR_ON(input_shape[idx_height] <= 1 || input_shape[idx_width] <= 1); + const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; + const unsigned int stride_x = pad_stride_info.stride().first; + const unsigned int stride_y = pad_stride_info.stride().second; + + const int pad_left = pad_stride_info.pad_left(); + const int pad_top = pad_stride_info.pad_top(); + const int pad_right = pad_stride_info.pad_right(); + const int pad_bottom = pad_stride_info.pad_bottom(); + + TensorShape output_shape = input_shape; + const unsigned int out_width = + (input_shape[idx_width] - 1) * stride_x - pad_left - pad_right + pool_info.pool_size.width; + const unsigned int out_height = + (input_shape[idx_height] - 1) * stride_y - pad_top - pad_bottom + pool_info.pool_size.height; + + output_shape.set(idx_width, out_width); + output_shape.set(idx_height, out_height); + return output_shape; +} + /** Calculate the output roi align shape of a tensor * * @param[in] input Input tensor info @@ -817,9 +945,10 @@ inline TensorShape compute_pool_shape(const ITensorInfo &input, PoolingLayerInfo * * @return the calculated shape */ -inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) +inline TensorShape +compute_roi_align_shape(const ITensorInfo &input, const ITensorInfo &rois, ROIPoolingLayerInfo pool_info) { - TensorShape output_shape{ input.tensor_shape() }; + TensorShape output_shape{input.tensor_shape()}; const unsigned int idx_width = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::WIDTH); const unsigned int idx_height = get_data_layout_dimension_index(input.data_layout(), DataLayoutDimension::HEIGHT); @@ -840,7 +969,7 @@ inline TensorShape compute_roi_align_shape(const ITensorInfo &input, const ITens */ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned int batch_size) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; output_shape.set(1, batch_size); return output_shape; @@ -855,15 +984,21 @@ inline TensorShape compute_rnn_shape(const ITensorInfo *input, const unsigned in * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info) +inline TensorShape compute_mm_shape(const ITensorInfo &input0, + const ITensorInfo &input1, + bool is_interleaved_transposed, + const GEMMReshapeInfo &reshape_info) { ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); - ARM_COMPUTE_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); + ARM_COMPUTE_ERROR_ON_MSG( + is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), + "The first input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true"); const bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d(); const bool reinterpret_output_as_3d = reshape_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? reshape_info.depth_output_gemm3d() : 1; - const int m = reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); + const int m = + reshape_info.reinterpret_input_as_3d() ? input0.dimension(1) * input0.dimension(2) : input0.dimension(1); // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third // dimension of the output tensor @@ -872,7 +1007,7 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const int dim2 = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2]; const int dim3 = reinterpret_input_as_3d ? 1 : input0.tensor_shape()[3]; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; output_shape.set(0, dim0); output_shape.set(1, dim1); @@ -885,15 +1020,14 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo /** Calculate the matrix multiplication output shape of two tensors * - * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels - * * @param[in] input0 First input tensor info * @param[in] input1 Second input tensor info * @param[in] gemm_info GEMM reshape info * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMReshapeInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -902,9 +1036,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d() != 0; const int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d() : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n()); output_shape.set(1, gemm_info.m()); @@ -931,7 +1065,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo * * @return the calculated shape */ -inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) +inline TensorShape +compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info) { ARM_COMPUTE_UNUSED(input1); ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4"); @@ -940,9 +1075,9 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo const bool reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0; const unsigned int depth_output_gemm3d = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1; - TensorShape output_shape{ input0.tensor_shape() }; + TensorShape output_shape{input0.tensor_shape()}; - if(!reinterpret_input_as_3d && !reinterpret_output_as_3d) + if (!reinterpret_input_as_3d && !reinterpret_output_as_3d) { output_shape.set(0, gemm_info.n); output_shape.set(1, gemm_info.m); @@ -963,20 +1098,50 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo /** Calculate the matrix multiplication output shape of two tensors * + * @param[in] input0 First input tensor info + * @param[in] input1 Second input tensor info + * @param[in] matmul_info Batch MatMul Kernel info to know which matrix is transposed + * + * @return the calculated shape + */ +inline TensorShape +compute_matmul_shape(const TensorShape &input0, const TensorShape &input1, const MatMulKernelInfo &matmul_info) +{ + TensorShape output_shape{input0}; + + if (matmul_info.adj_lhs) + { + output_shape.set(1, input0[0]); // The vertical (M) dimension + } + + if (matmul_info.adj_rhs) + { + output_shape.set(0, input1[1]); // The horizontal (N) dimension + } + else + { + output_shape.set(0, input1[0]); // The horizontal (N) dimension + } + + return output_shape; +} +/** Calculate the matrix multiplication output shape of two tensors + * * @param[in] input Input tensor info * @param[in] gemm_3d_depth (Optional) GEMM 3d depth * @param[in] batch_size_on_z (Optional) True if batch size is on z axis * * @return the calculated shape */ -inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) +inline TensorShape +compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false) { ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1); TensorShape output_shape = input.tensor_shape(); - if(gemm_3d_depth > 1) + if (gemm_3d_depth > 1) { - if(batch_size_on_z) + if (batch_size_on_z) { output_shape.shift_right(1); } @@ -1001,11 +1166,16 @@ inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned * @return the calculated shape */ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, - const Coordinates &starts, const Coordinates &ends, const Coordinates &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) + const Coordinates &starts, + const Coordinates &ends, + const Coordinates &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); + return compute_strided_slice_output_shape(input.tensor_shape(), starts, ends, strides, begin_mask, end_mask, + shrink_axis_mask); } /** Calculate the slice output shape of a tensor @@ -1016,60 +1186,72 @@ inline TensorShape compute_strided_slice_shape(const ITensorInfo &input, * * @return the calculated shape */ -inline TensorShape compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) +inline TensorShape +compute_slice_shape(const TensorShape &input_shape, const Coordinates &starts, const Coordinates &ends) { using namespace arm_compute::helpers::tensor_transform; - return compute_strided_slice_output_shape(input_shape, - starts, ends, BiStrides(), - 0, construct_slice_end_mask(ends), 0); + return compute_strided_slice_output_shape(input_shape, starts, ends, BiStrides(), 0, construct_slice_end_mask(ends), + 0); } /** Calculate the batch to space output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] block_x Block shape x value - * @param[in] block_y Block shape y value + * @param[in] data_layout Data layout + * @param[in] input Input tensor shape + * @param[in] block_x Block shape x value + * @param[in] block_y Block shape y value + * @param[in] crop_info Information about how the output shape is cropped after batch to space is performed * * @return the calculated shape */ -inline TensorShape compute_batch_to_space_shape(const ITensorInfo *input, const int block_x, const int block_y) +inline TensorShape compute_batch_to_space_shape( + DataLayout data_layout, const TensorShape &input, int block_x, int block_y, const CropInfo &crop_info = CropInfo{}) { - ARM_COMPUTE_ERROR_ON(block_x <= 0 || block_y <= 0); + ARM_COMPUTE_ERROR_ON(block_x < 1 || block_y < 1); - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape output_shape{input}; + + unsigned int new_width = input[idx_width] * static_cast<unsigned int>(block_x); + unsigned int new_height = input[idx_height] * static_cast<unsigned int>(block_y); + const unsigned int width_crop = crop_info.left + crop_info.right; + const unsigned int height_crop = crop_info.top + crop_info.bottom; + ARM_COMPUTE_ERROR_ON(new_width <= width_crop); + ARM_COMPUTE_ERROR_ON(new_height <= height_crop); + new_width -= width_crop; + new_height -= height_crop; - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y); - output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y)); + output_shape.set(idx_width, new_width); + output_shape.set(idx_height, new_height); + output_shape.set(idx_batch, input[idx_batch] / (block_x * block_y)); return output_shape; } /** Calculate the depth to space output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] block Block shape value + * @param[in] input_shape Input tensor shape + * @param[in] data_layout Operation data layout + * @param[in] block Block shape value * * @return the calculated shape */ -inline TensorShape compute_depth_to_space_shape(const ITensorInfo *input, int block) +inline TensorShape compute_depth_to_space_shape(const TensorShape &input_shape, DataLayout data_layout, int block) { ARM_COMPUTE_ERROR_ON(block < 2); - const DataLayout data_layout = input->data_layout(); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(idx_width, input->dimension(idx_width) * block); - output_shape.set(idx_height, input->dimension(idx_height) * block); - output_shape.set(idx_channel, input->dimension(idx_channel) / (block * block)); + TensorShape output_shape{input_shape}; + output_shape.set(idx_width, input_shape[idx_width] * block); + output_shape.set(idx_height, input_shape[idx_height] * block); + output_shape.set(idx_channel, input_shape[idx_channel] / (block * block)); return output_shape; } @@ -1087,10 +1269,10 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax TensorShape empty_shape; empty_shape.set(0, 0); - TensorShape out_shape{ input->tensor_shape() }; + TensorShape out_shape{input->tensor_shape()}; // Return empty shape if axis is invalid - if(axis > input->tensor_shape().num_dimensions()) + if (axis > input->tensor_shape().num_dimensions()) { return empty_shape; } @@ -1098,7 +1280,7 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax size_t axis_size = out_shape[axis]; // Return empty shape if num_split is not valid - if(axis_size % num_splits) + if (axis_size % num_splits) { return empty_shape; } @@ -1117,18 +1299,22 @@ inline TensorShape compute_split_shape(const ITensorInfo *input, unsigned int ax * * @return the calculated shape */ -inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const int block_x, const int block_y, const Size2D &padding_left, const Size2D &padding_right) +inline TensorShape compute_space_to_batch_shape( + const ITensorInfo *input, int block_x, int block_y, const Size2D &padding_left, const Size2D &padding_right) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_x + padding_left.x() + padding_right.x()); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_y + padding_left.y() + padding_right.y()); - output_shape.set(idx_batch, input->tensor_shape()[idx_batch] / (block_x * block_y)); + ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) % block_x != 0); + ARM_COMPUTE_ERROR_ON((input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) % block_y != 0); + + output_shape.set(idx_width, (input->tensor_shape()[idx_width] + padding_left.x() + padding_right.x()) / block_x); + output_shape.set(idx_height, (input->tensor_shape()[idx_height] + padding_left.y() + padding_right.y()) / block_y); + output_shape.set(idx_batch, input->tensor_shape()[idx_batch] * block_x * block_y); return output_shape; } @@ -1142,16 +1328,16 @@ inline TensorShape compute_space_to_batch_shape(const ITensorInfo *input, const */ inline TensorShape compute_space_to_depth_shape(const ITensorInfo *input, int32_t block_shape) { - TensorShape output_shape{ input->tensor_shape() }; + TensorShape output_shape{input->tensor_shape()}; const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - output_shape.set(idx_width, input->tensor_shape()[idx_width] * block_shape); - output_shape.set(idx_height, input->tensor_shape()[idx_height] * block_shape); - output_shape.set(idx_depth, input->tensor_shape()[idx_depth] / (block_shape * block_shape)); + output_shape.set(idx_width, input->tensor_shape()[idx_width] / block_shape); + output_shape.set(idx_height, input->tensor_shape()[idx_height] / block_shape); + output_shape.set(idx_depth, input->tensor_shape()[idx_depth] * (block_shape * block_shape)); return output_shape; } @@ -1187,7 +1373,7 @@ inline TensorShape compute_prior_box_shape(const ITensorInfo &input, const Prior inline TensorShape compute_padded_shape(const TensorShape &input_shape, const PaddingList &padding) { TensorShape padded_shape = input_shape; - for(size_t dim = 0; dim < padding.size(); ++dim) + for (size_t dim = 0; dim < padding.size(); ++dim) { const auto &padding_pair = padding[dim]; const uint32_t shape_on_index = (padded_shape.num_dimensions() <= dim) ? 1 : input_shape[dim]; @@ -1206,7 +1392,7 @@ inline TensorShape compute_padded_shape(const TensorShape &input_shape, const Pa inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Multiples &multiples) { TensorShape tiled_shape = input_shape; - for(size_t dim = 0; dim < multiples.size(); ++dim) + for (size_t dim = 0; dim < multiples.size(); ++dim) { tiled_shape.set(dim, input_shape[dim] * multiples[dim]); } @@ -1223,9 +1409,9 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul */ inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true) { - TensorShape output_shape{ input }; + TensorShape output_shape{input}; - if(!keep_dims) + if (!keep_dims) { output_shape.remove_dimension(axis); } @@ -1318,14 +1504,14 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si #if defined(ARM_COMPUTE_ASSERTS_ENABLED) // All dimensions must match except the axis one - for(unsigned int i = 0; i < MAX_DIMS; ++i) + for (unsigned int i = 0; i < MAX_DIMS; ++i) { - if(i == axis) + if (i == axis) { continue; } - for(const auto &tensor : input) + for (const auto &tensor : input) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); const TensorShape shape = extract_shape(tensor); @@ -1336,7 +1522,7 @@ inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, si // Calculate output shape size_t new_size = 0; - for(const auto &tensor : input) + for (const auto &tensor : input) { const TensorShape shape = extract_shape(tensor); new_size += shape[axis]; @@ -1359,14 +1545,14 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, ARM_COMPUTE_ERROR_ON(axis > a.num_dimensions()); ARM_COMPUTE_ERROR_ON(a.num_dimensions() > 4); - TensorShape shape_out{ a.tensor_shape() }; + TensorShape shape_out{a.tensor_shape()}; shape_out.set(axis, num_tensors); unsigned int i_shift = 0; - for(unsigned int i = 0; i < a.num_dimensions(); ++i) + for (unsigned int i = 0; i < a.num_dimensions(); ++i) { - if(i == axis) + if (i == axis) { i_shift++; } @@ -1376,18 +1562,177 @@ inline TensorShape compute_stack_shape(const ITensorInfo &a, unsigned int axis, return shape_out; } -inline TensorShape compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) +/** Calculate the output shape of 3d Convolution + * + * @param[in] src Input tensor shape + * @param[in] weights Weights tensor shape + * @param[in] conv3d_info 3d Convolution Parameters object + * + * @return the calculated shape + */ +inline TensorShape +compute_conv3d_shape(const TensorShape &src, const TensorShape &weights, const Conv3dInfo &conv3d_info) +{ + // Weight tensor shape indices (D H W Cin Cout) + constexpr unsigned int weights_depth_dim = 4u; + constexpr unsigned int weights_height_dim = 3u; + constexpr unsigned int weights_width_dim = 2u; + constexpr unsigned int weights_CHout_dim = 0u; + + // Source/Destination Tensor shape indices (N D H W C) + constexpr unsigned int batch_dim = 4u; + constexpr unsigned int depth_dim = 3u; + constexpr unsigned int height_dim = 2u; + constexpr unsigned int width_dim = 1u; + constexpr unsigned int channel_dim = 0u; + + TensorShape output_shape{src}; + const size_t pad_left = conv3d_info.padding.left; + const size_t pad_right = conv3d_info.padding.right; + const size_t pad_top = conv3d_info.padding.top; + const size_t pad_bottom = conv3d_info.padding.bottom; + const size_t pad_front = conv3d_info.padding.front; + const size_t pad_back = conv3d_info.padding.back; + const size_t dilation_x = conv3d_info.dilation.width; + const size_t dilation_y = conv3d_info.dilation.height; + const size_t dilation_z = conv3d_info.dilation.depth; + const size_t stride_x = conv3d_info.stride.x(); + const size_t stride_y = conv3d_info.stride.y(); + const size_t stride_z = conv3d_info.stride.z(); + + int output_width_size = 0; + int output_height_size = 0; + int output_depth_size = 0; + + switch (conv3d_info.round_type) + { + case DimensionRoundingType::FLOOR: + output_width_size = + static_cast<int>(std::floor((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::floor((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::floor((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + case DimensionRoundingType::CEIL: + output_width_size = + static_cast<int>(std::ceil((static_cast<float>(src[width_dim] + pad_left + pad_right - + (dilation_x * (weights[weights_width_dim] - 1) + 1)) / + stride_x) + + 1)); + output_height_size = + static_cast<int>(std::ceil((static_cast<float>(src[height_dim] + pad_top + pad_bottom - + (dilation_y * (weights[weights_height_dim] - 1) + 1)) / + stride_y) + + 1)); + output_depth_size = + static_cast<int>(std::ceil((static_cast<float>(src[depth_dim] + pad_front + pad_back - + (dilation_z * (weights[weights_depth_dim] - 1) + 1)) / + stride_z) + + 1)); + break; + default: + ARM_COMPUTE_ERROR("Unsupported rounding type"); + } + + output_shape.set(batch_dim, src[batch_dim]); + output_shape.set(width_dim, output_width_size); + output_shape.set(height_dim, output_height_size); + output_shape.set(depth_dim, output_depth_size); + output_shape.set(channel_dim, weights[weights_CHout_dim]); + return output_shape; +} + +/** Calculate the output pool3d shape of a tensor + * + * @param[in] src Input tensor info + * @param[in] pool3d_info Pooling layer info + * + * @return the calculated shape + */ +inline TensorShape compute_pool3d_shape(const TensorShape &src, Pooling3dLayerInfo pool3d_info) +{ + TensorShape output_shape{src}; + + const auto data_layout = DataLayout::NDHWC; + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int idx_depth = get_data_layout_dimension_index(data_layout, DataLayoutDimension::DEPTH); + const int pool_size_width = pool3d_info.is_global_pooling ? src[idx_width] : pool3d_info.pool_size.width; + const int pool_size_height = pool3d_info.is_global_pooling ? src[idx_height] : pool3d_info.pool_size.height; + const int pool_size_depth = pool3d_info.is_global_pooling ? src[idx_depth] : pool3d_info.pool_size.depth; + int output_width = 0; + int output_height = 0; + int output_depth = 0; + + std::tie(output_width, output_height, output_depth) = + scaled_3d_dimensions_signed(src[idx_width], src[idx_height], src[idx_depth], pool_size_width, pool_size_height, + pool_size_depth, pool3d_info); + + ARM_COMPUTE_ERROR_ON_MSG((output_width < 1 || output_height < 1 || output_depth < 1), + "Calculated output dimension size is invalid"); + + output_shape.set(idx_width, static_cast<size_t>(output_width)); + output_shape.set(idx_height, static_cast<size_t>(output_height)); + output_shape.set(idx_depth, static_cast<size_t>(output_depth)); + + return output_shape; +} + +/** Calculate the gather output shape of a tensor + * + * @param[in] input_shape Input tensor shape + * @param[in] indices_shape Indices tensor shape. Only supports for 2d and 3d indices + * @param[in] actual_axis Axis to be used in the computation + * + * @note Let input_shape be (X,Y,Z) and indices shape (W,O,P) and axis 1 + * the new shape is computed by replacing the axis in the input shape with + * the indice shape so the output shape will be (X,W,O,P,Z) + * + * @return the calculated shape + */ +inline TensorShape +compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_shape, uint32_t actual_axis) { - ARM_COMPUTE_ERROR_ON(indices_shape.num_dimensions() > 1); - ARM_COMPUTE_ERROR_ON(input_shape.num_dimensions() > 4); - ARM_COMPUTE_ERROR_ON(actual_axis >= input_shape.num_dimensions()); + const auto input_num_dims = input_shape.num_dimensions(); + const auto indices_num_dims = indices_shape.num_dimensions(); + + ARM_COMPUTE_ERROR_ON(actual_axis >= input_num_dims); + ARM_COMPUTE_ERROR_ON(input_num_dims + indices_num_dims - 1 > Coordinates::num_max_dimensions); + + TensorShape output_shape; + size_t dim_no = 0; + + for (; dim_no < actual_axis; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no]); + } + + for (; dim_no < actual_axis + indices_num_dims; ++dim_no) + { + output_shape.set(dim_no, indices_shape[dim_no - actual_axis]); + } + + for (; dim_no < input_num_dims + indices_num_dims - 1; ++dim_no) + { + output_shape.set(dim_no, input_shape[dim_no + 1 - indices_num_dims]); + } - TensorShape output_shape = input_shape; - output_shape[actual_axis] = indices_shape[0]; + ARM_COMPUTE_ERROR_ON(input_shape.total_size() * indices_shape.total_size() != + output_shape.total_size() * input_shape[actual_axis]); return output_shape; } } // namespace shape_calculator } // namespace misc } // namespace arm_compute -#endif /* ARM_COMPUTE_MISC_SHAPE_CALCULATOR_H */ +#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h index 1cbdbfe16f..944fcb95f9 100644 --- a/arm_compute/core/utils/misc/Traits.h +++ b/arm_compute/core/utils/misc/Traits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,8 @@ #ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H +#include "arm_compute/core/Types.h" + #include <type_traits> namespace arm_compute diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index b2bb63f5c8..22f10d74cc 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,11 @@ #ifndef ARM_COMPUTE_MISC_UTILITY_H #define ARM_COMPUTE_MISC_UTILITY_H +#include "arm_compute/core/Error.h" + #include <algorithm> #include <array> +#include <cstdint> #include <limits> #include <numeric> #include <vector> @@ -41,7 +44,7 @@ struct index_sequence }; template <std::size_t N, std::size_t... S> -struct index_sequence_generator : index_sequence_generator < N - 1, N - 1, S... > +struct index_sequence_generator : index_sequence_generator<N - 1, N - 1, S...> { }; @@ -55,17 +58,17 @@ template <std::size_t N> using index_sequence_t = typename index_sequence_generator<N>::type; template <typename T, std::size_t N, T val, T... vals> -struct generate_array : generate_array < T, N - 1, val, val, vals... > +struct generate_array : generate_array<T, N - 1, val, val, vals...> { }; template <typename T, T val, T... vals> struct generate_array<T, 0, val, vals...> { - static constexpr std::array<T, sizeof...(vals)> value{ vals... }; + static constexpr std::array<T, sizeof...(vals)> value{vals...}; }; -template <typename T, T val, T... vals> +template <typename T, T val, T... vals> constexpr std::array<T, sizeof...(vals)> generate_array<T, 0, val, vals...>::value; /** @endcond */ @@ -76,7 +79,7 @@ template <std::size_t... S, typename T = std::array<typename std::iterator_traits<Iterator>::value_type, sizeof...(S)>> T make_array(Iterator first, index_sequence<S...>) { - return T{ { first[S]... } }; + return T{{first[S]...}}; } } // namespace detail @@ -84,7 +87,7 @@ template <std::size_t N, typename Iterator> std::array<typename std::iterator_traits<Iterator>::value_type, N> make_array(Iterator first, Iterator last) { ARM_COMPUTE_UNUSED(last); - return detail::make_array(first, index_sequence_t<N> {}); + return detail::make_array(first, index_sequence_t<N>{}); } /** Performs clamping among a lower and upper value. @@ -116,7 +119,7 @@ inline void for_each(F &&) * @param[in] args Remaining arguments */ template <typename F, typename T, typename... Ts> -inline void for_each(F &&func, T &&arg, Ts &&... args) +inline void for_each(F &&func, T &&arg, Ts &&...args) { func(std::forward<T>(arg)); for_each(std::forward<F>(func), std::forward<Ts>(args)...); @@ -140,9 +143,11 @@ inline T &&foldl(F &&, T &&value) * @param[in] values Remaining arguments */ template <typename F, typename T, typename U, typename... Us> -inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) +inline auto foldl(F &&func, T &&initial, U &&value, Us &&...values) + -> decltype(func(std::forward<T>(initial), std::forward<U>(value))) { - return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...); + return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), + std::forward<Us>(values)...); } /** Perform an index sort of a given vector. @@ -157,11 +162,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) std::vector<size_t> idx(v.size()); std::iota(idx.begin(), idx.end(), 0); - std::sort(idx.begin(), idx.end(), - [&v](size_t i1, size_t i2) - { - return v[i1] < v[i2]; - }); + std::sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); return idx; } @@ -175,7 +176,7 @@ std::vector<size_t> sort_indices(const std::vector<T> &v) */ inline bool endswith(const std::string &str, const std::string &suffix) { - if(str.size() < suffix.size()) + if (str.size() < suffix.size()) { return false; } @@ -202,12 +203,28 @@ inline bool check_aligned(void *ptr, const size_t alignment) */ inline std::string tolower(std::string string) { - std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) - { - return std::tolower(c); - }); + std::transform(string.begin(), string.end(), string.begin(), [](unsigned char c) { return std::tolower(c); }); return string; } + +/** Get environment variable as a string + * + * @note Return empty string on bare-metal + * + * @param[in] env_name Name of the Environment variable to retrieve + * + * @return Environment variable content, or empty string if the variable is undefined or on bare-metal + */ +inline std::string getenv(const std::string &env_name) +{ +#ifdef BARE_METAL + ARM_COMPUTE_UNUSED(env_name); + return std::string{}; +#else // BARE_METAL + const auto env_chr = std::getenv(env_name.c_str()); + return env_chr == nullptr ? std::string{} : std::string{env_chr}; +#endif // BARE_METAL +} } // namespace utility } // namespace arm_compute #endif /* ARM_COMPUTE_MISC_UTILITY_H */ diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index 4ef49476b2..2324fe1838 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,10 @@ namespace quantization * * @return a status */ -Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplier, int32_t *shift, bool ignore_epsilon = false); +Status calculate_quantized_multiplier(float multiplier, + int32_t *quant_multiplier, + int32_t *shift, + bool ignore_epsilon = false); /** Calculate quantized representation of multiplier with value less than one. * * @param[in] multiplier Real multiplier. @@ -51,7 +54,10 @@ Status calculate_quantized_multiplier(float multiplier, int32_t *quant_multiplie * * @return a status */ -Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *quant_multiplier, int32_t *right_shift, bool ignore_epsilon = false); +Status calculate_quantized_multiplier_less_than_one(float multiplier, + int32_t *quant_multiplier, + int32_t *right_shift, + bool ignore_epsilon = false); /** Calculate quantized representation of multiplier having value greater than one. * * @param[in] multiplier Real multiplier. @@ -60,7 +66,8 @@ Status calculate_quantized_multiplier_less_than_one(float multiplier, int32_t *q * * @return a status */ -Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift); +Status +calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t *quantized_multiplier, int32_t *left_shift); /** Calculate quantized representation of per-channel multipliers * @@ -71,9 +78,9 @@ Status calculate_quantized_multiplier_greater_than_one(float multiplier, int32_t * * @return a status */ -Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, - const QuantizationInfo &wq_info, - const QuantizationInfo &oq_info, +Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, + const QuantizationInfo &wq_info, + const QuantizationInfo &oq_info, GEMMLowpOutputStageInfo &stage_info); /** Get minimum and maximum values for the input quantized data type @@ -81,6 +88,7 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, * @return min and max values for the quantized data type */ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_type); + /** Compute quantized per-channel multipliers and shifts. As many multipliers * and shifts as output channels are computed. If weights are not quantized * per-channel, multipliers and shifts will end up being the same for each @@ -89,16 +97,12 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty * @param[in] input Input tensor info. * @param[in] weights Weights tensor info. * @param[in] output Output tensor info. - * @param[in] idx_ofms Dimension index to get OFMs from the weights tensor. * @param[out] output_multipliers_ptr Pointer to the buffer where to store per-channel multipliers. * @param[out] output_shifts_ptr Pointer to the buffer where to store per-channel shifts. - * - * @return min and max values for the quantized data type */ void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, - unsigned int idx_ofms, int32_t *output_multipliers_ptr, int32_t *output_shifts_ptr); @@ -150,7 +154,10 @@ int32_t saturating_rounding_multiply_by_pow2(int32_t exponent, int32_t v); * @param[out] output_shift Shift for inverse square root * */ -void get_invsqrt_quantized_multiplier_exp(int32_t input, int32_t reverse_shift, int32_t &output_inv_sqrt, int32_t &output_shift); +void get_invsqrt_quantized_multiplier_exp(int32_t input, + int32_t reverse_shift, + int32_t &output_inv_sqrt, + int32_t &output_shift); } // namespace quantization } // namespace arm_compute |