aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/CL
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-09-04 18:44:23 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 13:03:09 +0100
commit6ff3b19ee6120edf015fad8caab2991faa3070af (patch)
treea7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /arm_compute/core/CL
downloadComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'arm_compute/core/CL')
-rw-r--r--arm_compute/core/CL/CLHelpers.h105
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h248
-rw-r--r--arm_compute/core/CL/CLKernels.h90
-rw-r--r--arm_compute/core/CL/CLTypes.h41
-rw-r--r--arm_compute/core/CL/ICLArray.h118
-rw-r--r--arm_compute/core/CL/ICLDistribution1D.h102
-rw-r--r--arm_compute/core/CL/ICLHOG.h113
-rw-r--r--arm_compute/core/CL/ICLKernel.h157
-rw-r--r--arm_compute/core/CL/ICLLut.h94
-rw-r--r--arm_compute/core/CL/ICLMultiHOG.h56
-rw-r--r--arm_compute/core/CL/ICLMultiImage.h58
-rw-r--r--arm_compute/core/CL/ICLSimple2DKernel.h41
-rw-r--r--arm_compute/core/CL/ICLSimple3DKernel.h43
-rw-r--r--arm_compute/core/CL/ICLSimpleKernel.h66
-rw-r--r--arm_compute/core/CL/ICLTensor.h106
-rw-r--r--arm_compute/core/CL/OpenCL.h43
-rw-r--r--arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h71
-rw-r--r--arm_compute/core/CL/kernels/CLAccumulateKernel.h91
-rw-r--r--arm_compute/core/CL/kernels/CLActivationLayerKernel.h46
-rw-r--r--arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h72
-rw-r--r--arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h74
-rw-r--r--arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h77
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseAndKernel.h68
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseNotKernel.h49
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseOrKernel.h68
-rw-r--r--arm_compute/core/CL/kernels/CLBitwiseXorKernel.h68
-rw-r--r--arm_compute/core/CL/kernels/CLBox3x3Kernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLCannyEdgeKernel.h147
-rw-r--r--arm_compute/core/CL/kernels/CLChannelCombineKernel.h83
-rw-r--r--arm_compute/core/CL/kernels/CLChannelExtractKernel.h79
-rw-r--r--arm_compute/core/CL/kernels/CLCol2ImKernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLColorConvertKernel.h90
-rw-r--r--arm_compute/core/CL/kernels/CLConvolutionKernel.h182
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h76
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConvertKernel.h61
-rw-r--r--arm_compute/core/CL/kernels/CLDerivativeKernel.h72
-rw-r--r--arm_compute/core/CL/kernels/CLDilateKernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLErodeKernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLFastCornersKernel.h114
-rw-r--r--arm_compute/core/CL/kernels/CLFillBorderKernel.h77
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h80
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h81
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h63
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h70
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h73
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h84
-rw-r--r--arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h67
-rw-r--r--arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h100
-rw-r--r--arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h105
-rw-r--r--arm_compute/core/CL/kernels/CLHOGDetectorKernel.h82
-rw-r--r--arm_compute/core/CL/kernels/CLHarrisCornersKernel.h85
-rw-r--r--arm_compute/core/CL/kernels/CLHistogramKernel.h98
-rw-r--r--arm_compute/core/CL/kernels/CLIm2ColKernel.h111
-rw-r--r--arm_compute/core/CL/kernels/CLIntegralImageKernel.h73
-rw-r--r--arm_compute/core/CL/kernels/CLLKTrackerKernel.h183
-rw-r--r--arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h68
-rw-r--r--arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h77
-rw-r--r--arm_compute/core/CL/kernels/CLMeanStdDevKernel.h74
-rw-r--r--arm_compute/core/CL/kernels/CLMedian3x3Kernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h104
-rw-r--r--arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h63
-rw-r--r--arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h52
-rw-r--r--arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h71
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h73
-rw-r--r--arm_compute/core/CL/kernels/CLPoolingLayerKernel.h69
-rw-r--r--arm_compute/core/CL/kernels/CLRemapKernel.h70
-rw-r--r--arm_compute/core/CL/kernels/CLScaleKernel.h55
-rw-r--r--arm_compute/core/CL/kernels/CLScharr3x3Kernel.h86
-rw-r--r--arm_compute/core/CL/kernels/CLSobel3x3Kernel.h72
-rw-r--r--arm_compute/core/CL/kernels/CLSobel5x5Kernel.h116
-rw-r--r--arm_compute/core/CL/kernels/CLSobel7x7Kernel.h116
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h109
-rw-r--r--arm_compute/core/CL/kernels/CLTableLookupKernel.h47
-rw-r--r--arm_compute/core/CL/kernels/CLThresholdKernel.h56
-rw-r--r--arm_compute/core/CL/kernels/CLTransposeKernel.h49
-rw-r--r--arm_compute/core/CL/kernels/CLWarpAffineKernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h51
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h114
79 files changed, 6485 insertions, 0 deletions
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
new file mode 100644
index 0000000000..26253e3f38
--- /dev/null
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHELPERS_H__
+#define __ARM_COMPUTE_CLHELPERS_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Helpers.h"
+
+#include <string>
+
+namespace arm_compute
+{
+enum class DataType;
+enum class GPUTarget;
+
+/** Enable operation operations on GPUTarget enumerations */
+template <>
+struct enable_bitwise_ops<arm_compute::GPUTarget>
+{
+ static constexpr bool value = true;
+};
+
+/** Max vector width of an OpenCL vector */
+static constexpr const unsigned int max_cl_vector_width = 16;
+
+/** Translates a tensor data type to the appropriate OpenCL type.
+ *
+ * @param[in] dt @ref DataType to be translated to OpenCL type.
+ *
+ * @return The string specifying the OpenCL type to be used.
+ */
+std::string get_cl_type_from_data_type(const DataType &dt);
+
+/** Translates a given gpu device target to string.
+ *
+ * @param[in] target Given gpu target.
+ *
+ * @return The string describing the target.
+ */
+const std::string &string_from_target(GPUTarget target);
+
+/** Helper function to create and return a unique_ptr pointed to a CL kernel object
+ * It also calls the kernel's configuration.
+ *
+ * @param[in] args All the arguments that need pass to kernel's configuration.
+ *
+ * @return A unique pointer pointed to a CL kernel object
+ */
+template <typename Kernel, typename... T>
+std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
+{
+ std::unique_ptr<Kernel> k = arm_compute::cpp14::make_unique<Kernel>();
+ k->configure(std::forward<T>(args)...);
+ return k;
+}
+
+/** Helper function to create and return a unique_ptr pointed to a CL kernel object
+ *
+ * @return A unique pointer pointed to a CL kernel object
+ */
+template <typename Kernel>
+std::unique_ptr<Kernel> create_kernel()
+{
+ std::unique_ptr<Kernel> k = arm_compute::cpp14::make_unique<Kernel>();
+ return k;
+}
+
+/** Helper function to get the GPU target from CL device
+ *
+ * @param[in] device A CL device
+ *
+ * @return the GPU target
+ */
+GPUTarget get_target_from_device(cl::Device &device);
+
+/** Helper function to get the GPU arch
+ *
+ * @param[in] target GPU target
+ *
+ * @return the GPU target which shows the arch
+ */
+GPUTarget get_arch_from_target(GPUTarget target);
+}
+#endif
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
new file mode 100644
index 0000000000..c29610c252
--- /dev/null
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_H__
+#define __ARM_COMPUTE_CLKERNELLIBRARY_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+
+namespace arm_compute
+{
+/** Program class */
+class Program
+{
+public:
+ /** Default constructor. */
+ Program();
+ /** Construct program from source file.
+ *
+ * @param[in] context CL context used to create the program.
+ * @param[in] name Program name.
+ * @param[in] source Program source.
+ */
+ Program(cl::Context context, std::string name, std::string source);
+ /** Construct program from binary file.
+ *
+ * @param[in] context CL context used to create the program.
+ * @param[in] device CL device for which the programs are created.
+ * @param[in] name Program name.
+ * @param[in] binary Program binary.
+ */
+ Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary);
+ /** Default Copy Constructor. */
+ Program(const Program &) = default;
+ /** Default Move Constructor. */
+ Program(Program &&) = default;
+ /** Default copy assignment operator. */
+ Program &operator=(const Program &) = default;
+ /** Default move assignment operator. */
+ Program &operator=(Program &&) = default;
+ /**Returns program name.
+ *
+ * @return Program's name.
+ */
+ std::string name() const
+ {
+ return _name;
+ }
+ /** User-defined conversion to the underlying CL program.
+ *
+ * @return The CL program object.
+ */
+ explicit operator cl::Program() const;
+
+ static bool build(const cl::Program &program, const std::string &build_options = "");
+ /** Build the underlying CL program.
+ *
+ * @param[in] build_options Options used to build the CL program.
+ *
+ * @return A reference to itself.
+ */
+ cl::Program build(const std::string &build_options = "") const;
+
+private:
+ cl::Context _context; /**< Underlying CL context. */
+ cl::Device _device; /**< CL device for which the programs are created. */
+ bool _is_binary; /**< Create program from binary? */
+ std::string _name; /**< Program name. */
+ std::string _source; /**< Source code for the program. */
+ std::vector<unsigned char> _binary; /**< Binary from which to create the program. */
+};
+
+/** Kernel class */
+class Kernel
+{
+public:
+ /** Default Constructor. */
+ Kernel();
+ /** Default Copy Constructor. */
+ Kernel(const Kernel &) = default;
+ /** Default Move Constructor. */
+ Kernel(Kernel &&) = default;
+ /** Default copy assignment operator. */
+ Kernel &operator=(const Kernel &) = default;
+ /** Default move assignment operator. */
+ Kernel &operator=(Kernel &&) = default;
+ /** Constructor.
+ *
+ * @param[in] name Kernel name.
+ * @param[in] program Built program.
+ */
+ Kernel(std::string name, const cl::Program &program);
+ /** Returns kernel name.
+ *
+ * @return Kernel's name.
+ */
+ std::string name() const
+ {
+ return _name;
+ }
+ /** Returns OpenCL kernel.
+ *
+ * @return OpenCL Kernel.
+ */
+ explicit operator cl::Kernel() const
+ {
+ return _kernel;
+ }
+
+private:
+ std::string _name; /**< Kernel name */
+ cl::Kernel _kernel; /**< OpenCL Kernel */
+};
+
+/** CLKernelLibrary class */
+class CLKernelLibrary
+{
+ using StringSet = std::set<std::string>;
+
+private:
+ /** Default Constructor. */
+ CLKernelLibrary();
+
+public:
+ /** Prevent instances of this class from being copied. */
+ CLKernelLibrary(const CLKernelLibrary &) = delete;
+ /** Prevent instances of this class from being copied. */
+ const CLKernelLibrary &operator=(const CLKernelLibrary &) = delete;
+ /** Access the KernelLibrary singleton.
+ * @return The KernelLibrary instance.
+ */
+ static CLKernelLibrary &get();
+ /** Initialises the kernel library.
+ *
+ * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded.
+ * @param[in] context (Optional) CL context used to create programs.
+ * @param[in] device (Optional) CL device for which the programs are created.
+ */
+ void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
+ {
+ _kernel_path = std::move(kernel_path);
+ _context = std::move(context);
+ _device = std::move(device);
+ }
+ /** Sets the path that the kernels reside in.
+ *
+ * @param[in] kernel_path Path of the kernel.
+ */
+ void set_kernel_path(const std::string &kernel_path)
+ {
+ _kernel_path = kernel_path;
+ };
+ /** Sets the CL context used to create programs.
+ *
+ * @note Setting the context also resets the device to the
+ * first one available in the new context.
+ *
+ * @param[in] context A CL context.
+ */
+ void set_context(cl::Context context)
+ {
+ _context = std::move(context);
+
+ const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
+
+ if(cl_devices.empty())
+ {
+ _device = cl::Device();
+ }
+ else
+ {
+ _device = cl_devices[0];
+ }
+ };
+ /** Sets the CL device for which the programs are created.
+ *
+ * @param[in] device A CL device.
+ */
+ void set_device(cl::Device device)
+ {
+ _device = std::move(device);
+ };
+ /** Creates a kernel from the kernel library.
+ *
+ * @param[in] kernel_name Kernel name.
+ * @param[in] build_options_set Kernel build options as a set.
+ *
+ * @return The created kernel.
+ */
+ Kernel create_kernel(const std::string &kernel_name, const StringSet &build_options_set = {}) const;
+ /** Serializes and saves programs to a binary.
+ *
+ */
+ void save_binary();
+ /** Load serialized binary with all the programs.
+ *
+ */
+ void load_binary();
+
+private:
+ /** Load program and its dependencies.
+ *
+ * @param[in] program_name Name of the program to load.
+ */
+ const Program &load_program(const std::string &program_name) const;
+ /** Concatenates contents of a set into a single string.
+ *
+ * @param[in] s Input set to concatenate.
+ *
+ * @return Concatenated string.
+ */
+ std::string stringify_set(const StringSet &s) const;
+
+ cl::Context _context; /**< Underlying CL context. */
+ cl::Device _device; /**< Underlying CL device. */
+ std::string _kernel_path; /**< Path to the kernels folder. */
+ mutable std::map<std::string, const Program> _programs_map; /**< Map with all already loaded program data. */
+ mutable std::map<std::string, cl::Program> _built_programs_map; /**< Map with all already built program data. */
+ static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
+ static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
+ Used for compile-time kernel inclusion. >*/
+};
+}
+#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
new file mode 100644
index 0000000000..0e9f356e52
--- /dev/null
+++ b/arm_compute/core/CL/CLKernels.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLKERNELS_H__
+#define __ARM_COMPUTE_CLKERNELS_H__
+
+/* Header regrouping all the CL kernels */
+#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
+#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
+#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h"
+#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h"
+#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
+#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
+#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
+#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
+#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
+#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
+#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
+#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
+#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
+#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
+#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
+#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
+#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
+#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
+#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+
+#endif /* __ARM_COMPUTE_CLKERNELS_H__ */
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
new file mode 100644
index 0000000000..c5643d8939
--- /dev/null
+++ b/arm_compute/core/CL/CLTypes.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CL_TYPES_H__
+#define __ARM_COMPUTE_CL_TYPES_H__
+
+namespace arm_compute
+{
+/** Available GPU Targets */
+enum class GPUTarget
+{
+ GPU_ARCH_MASK = 0xF00,
+ MIDGARD = 0x100,
+ BIFROST = 0x200,
+ T600 = 0x110,
+ T700 = 0x120,
+ T800 = 0x130,
+ G70 = 0x210
+};
+}
+#endif /* __ARM_COMPUTE_CL_TYPES_H__ */
diff --git a/arm_compute/core/CL/ICLArray.h b/arm_compute/core/CL/ICLArray.h
new file mode 100644
index 0000000000..1b676ed5a3
--- /dev/null
+++ b/arm_compute/core/CL/ICLArray.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLARRAY_H__
+#define __ARM_COMPUTE_ICLARRAY_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/IArray.h"
+#include "arm_compute/core/ITensor.h"
+
+namespace arm_compute
+{
+/** Interface for OpenCL Array */
+template <class T>
+class ICLArray : public IArray<T>
+{
+public:
+ /* Constructor */
+ explicit ICLArray(size_t max_num_values)
+ : IArray<T>(max_num_values), _mapping(nullptr)
+ {
+ }
+
+ ICLArray(const ICLArray &) = delete;
+ ICLArray &operator=(const ICLArray &) = delete;
+ virtual ~ICLArray() = default;
+ /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the array's data.
+ *
+ * @return A reference to an OpenCL buffer containing the array's data.
+ */
+ virtual const cl::Buffer &cl_buffer() const = 0;
+ /** Enqueue a map operation of the allocated buffer on the given queue.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ *
+ * @return The mapping address.
+ */
+ void map(cl::CommandQueue &q, bool blocking = true)
+ {
+ _mapping = do_map(q, blocking);
+ }
+ /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ void unmap(cl::CommandQueue &q)
+ {
+ do_unmap(q, _mapping);
+ _mapping = nullptr;
+ }
+
+ // Inherited methods overridden:
+ T *buffer() const override
+ {
+ return reinterpret_cast<T *>(_mapping);
+ }
+
+protected:
+ /** Method to be implemented by the child class to map the OpenCL buffer
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
+ /** Method to be implemented by the child class to unmap the OpenCL buffer
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] mapping Pointer to the buffer to be unmapped.
+ */
+ virtual void do_unmap(cl::CommandQueue &q, uint8_t *mapping) = 0;
+
+private:
+ uint8_t *_mapping;
+};
+
+using ICLKeyPointArray = ICLArray<KeyPoint>;
+using ICLCoordinates2DArray = ICLArray<Coordinates2D>;
+using ICLDetectionWindowArray = ICLArray<DetectionWindow>;
+using ICLSize2DArray = ICLArray<Size2D>;
+using ICLUInt8Array = ICLArray<cl_uchar>;
+using ICLUInt16Array = ICLArray<cl_ushort>;
+using ICLUInt32Array = ICLArray<cl_uint>;
+using ICLInt16Array = ICLArray<cl_short>;
+using ICLInt32Array = ICLArray<cl_int>;
+using ICLFloatArray = ICLArray<cl_float>;
+}
+#endif /*__ARM_COMPUTE_ICLARRAY_H__*/
diff --git a/arm_compute/core/CL/ICLDistribution1D.h b/arm_compute/core/CL/ICLDistribution1D.h
new file mode 100644
index 0000000000..8fbbbbf548
--- /dev/null
+++ b/arm_compute/core/CL/ICLDistribution1D.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLDISTRIBUTION1D_H__
+#define __ARM_COMPUTE_ICLDISTRIBUTION1D_H__
+
+#include "arm_compute/core/IDistribution1D.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+class CommandQueue;
+}
+
+namespace arm_compute
+{
+/** ICLDistribution1D interface class */
+class ICLDistribution1D : public IDistribution1D
+{
+public:
+ /** Constructor: Creates a 1D CLDistribution of a consecutive interval [offset, offset + range - 1]
+ * defined by a start offset and valid range, divided equally into num_bins parts.
+ *
+ * @param[in] num_bins The number of bins the distribution is divided in.
+ * @param[in] offset The start of the values to use.
+ * @param[in] range The total number of the consecutive values of the distribution interval.
+ */
+ ICLDistribution1D(size_t num_bins, int32_t offset, uint32_t range);
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ ICLDistribution1D(const ICLDistribution1D &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ const ICLDistribution1D &operator=(const ICLDistribution1D &) = delete;
+ /** Enqueue a map operation of the allocated buffer on the given queue.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ void map(cl::CommandQueue &q, bool blocking = true);
+ /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ void unmap(cl::CommandQueue &q);
+ /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the distribution's data.
+ *
+ * @return A reference to an OpenCL buffer containing the distribution's data.
+ */
+ virtual cl::Buffer &cl_buffer() = 0;
+ // Inherited methods overridden:
+ uint32_t *buffer() const override;
+
+protected:
+ /** Method to be implemented by the child class to map the OpenCL buffer
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ virtual uint32_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
+ /** Method to be implemented by the child class to unmap the OpenCL buffer
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ virtual void do_unmap(cl::CommandQueue &q) = 0;
+
+protected:
+ uint32_t *_mapping; /**< The distribution data. */
+};
+}
+#endif /* __ARM_COMPUTE_ICLDISTRIBUTION1D_H__ */
diff --git a/arm_compute/core/CL/ICLHOG.h b/arm_compute/core/CL/ICLHOG.h
new file mode 100644
index 0000000000..a3d2fb4a57
--- /dev/null
+++ b/arm_compute/core/CL/ICLHOG.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLHOG_H__
+#define __ARM_COMPUTE_ICLHOG_H__
+
+#include "arm_compute/core/IHOG.h"
+
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+class CommandQueue;
+}
+
+namespace arm_compute
+{
+/** Interface for OpenCL HOG data-object */
+class ICLHOG : public IHOG
+{
+public:
+ /** Default constructor */
+ ICLHOG();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ ICLHOG(const ICLHOG &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ ICLHOG &operator=(const ICLHOG &) = delete;
+ /** Allow instances of this class to be moved */
+ ICLHOG(ICLHOG &&) = default;
+ /** Allow instances of this class to be moved */
+ ICLHOG &operator=(ICLHOG &&) = default;
+ /** Default destructor */
+ virtual ~ICLHOG() = default;
+
+ /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the hog's descriptor
+ *
+ * @return A reference to an OpenCL buffer containing the hog's descriptor
+ */
+ virtual const cl::Buffer &cl_buffer() const = 0;
+
+ /** Enqueue a map operation of the allocated buffer on the given queue.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ *
+ * @return The mapping address.
+ */
+ void map(cl::CommandQueue &q, bool blocking = true);
+
+ /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ void unmap(cl::CommandQueue &q);
+
+ /** Interface to be implemented by the child class to free the allocated cl buffer.
+ *
+ * @warning The buffer must have been allocated previously. Otherwise calling the function will fail.
+ */
+ virtual void free() = 0;
+
+ // Inherited methods overridden:
+ float *descriptor() const override;
+
+protected:
+ /** Method to be implemented by the child class to map the OpenCL buffer
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
+ /** Method to be implemented by the child class to unmap the OpenCL buffer
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ virtual void do_unmap(cl::CommandQueue &q) = 0;
+
+private:
+ uint8_t *_mapping;
+};
+}
+#endif /*__ARM_COMPUTE_ICLHOG_H__ */
diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h
new file mode 100644
index 0000000000..72c963d11b
--- /dev/null
+++ b/arm_compute/core/CL/ICLKernel.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLKERNEL_H__
+#define __ARM_COMPUTE_ICLKERNEL_H__
+
+#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/IKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+class Window;
+
+/** Common interface for all the OpenCL kernels */
+class ICLKernel : public IKernel
+{
+public:
+ /** Constructor */
+ ICLKernel();
+ /** Returns a reference to the OpenCL kernel of this object.
+ *
+ * @return A reference to the OpenCL kernel of this object.
+ */
+ cl::Kernel &kernel();
+ /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per 1D tensor object.
+ *
+ * @return The number of arguments enqueues per 1D tensor object.
+ */
+ unsigned int num_arguments_per_1D_tensor() const;
+ /** Returns the number of arguments enqueued per 2D tensor object.
+ *
+ * @return The number of arguments enqueues per 2D tensor object.
+ */
+ unsigned int num_arguments_per_2D_tensor() const;
+ /** Returns the number of arguments enqueued per 3D tensor object.
+ *
+ * @return The number of arguments enqueues per 3D tensor object.
+ */
+ unsigned int num_arguments_per_3D_tensor() const;
+ /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
+ *
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.
+ */
+ virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
+ /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] value Value to set as an argument of the object's kernel.
+ */
+ template <typename T>
+ void add_argument(unsigned int &idx, T value)
+ {
+ _kernel.setArg(idx++, value);
+ }
+
+ /** Set the targeted GPU architecture
+ *
+ * @param[in] target The targeted GPU architecture
+ */
+ void set_target(GPUTarget target);
+
+ /** Set the targeted GPU architecture according to the CL device
+ *
+ * @param[in] device A CL device
+ */
+ void set_target(cl::Device &device);
+
+ /** Get the targeted GPU architecture
+ *
+ * @return The targeted GPU architecture.
+ */
+ GPUTarget get_target() const;
+
+private:
+ /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <unsigned int dimension_size>
+ void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+ /** Returns the number of arguments enqueued per tensor object.
+ *
+ * @return The number of arguments enqueued per tensor object.
+ */
+ template <unsigned int dimension_size>
+ unsigned int num_arguments_per_tensor() const;
+
+protected:
+ cl::Kernel _kernel; /**< OpenCL kernel to run */
+ cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+ GPUTarget _target; /**< The targeted GPU */
+};
+
+/** Add the kernel to the command queue with the given window.
+ *
+ * @note Depending on the size of the window, this might translate into several jobs being enqueued.
+ *
+ * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
+ *
+ * @param[in,out] queue OpenCL command queue.
+ * @param[in] kernel Kernel to enqueue
+ * @param[in] window Window the kernel has to process.
+ * @param[in] lws_hint Local workgroup size requested, by default (128,1)
+ *
+ * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
+ */
+void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = cl::Range_128_1);
+}
+#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */
diff --git a/arm_compute/core/CL/ICLLut.h b/arm_compute/core/CL/ICLLut.h
new file mode 100644
index 0000000000..2016ebb5c3
--- /dev/null
+++ b/arm_compute/core/CL/ICLLut.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLLUT_H__
+#define __ARM_COMPUTE_ICLLUT_H__
+
+#include "arm_compute/core/ILut.h"
+
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+class CommandQueue;
+}
+
+namespace arm_compute
+{
+/** Interface for OpenCL LUT */
+class ICLLut : public ILut
+{
+public:
+ ICLLut();
+ ICLLut(const ICLLut &) = delete;
+ ICLLut &operator=(const ICLLut &) = delete;
+
+ /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the lut's data.
+ *
+ * @return A reference to an OpenCL buffer containing the lut's data.
+ */
+ virtual const cl::Buffer &cl_buffer() const = 0;
+ /** Enqueue a map operation of the allocated buffer on the given queue.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ void map(cl::CommandQueue &q, bool blocking = true);
+ /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ void unmap(cl::CommandQueue &q);
+
+ // Inherited methods overridden:
+ uint8_t *buffer() const override;
+
+protected:
+ /** Method to be implemented by the child class to map the OpenCL buffer
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
+ /** Method to be implemented by the child class to unmap the OpenCL buffer
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ virtual void do_unmap(cl::CommandQueue &q) = 0;
+
+private:
+ uint8_t *_mapping;
+};
+}
+#endif /*__ARM_COMPUTE_ICLLUT_H__ */
diff --git a/arm_compute/core/CL/ICLMultiHOG.h b/arm_compute/core/CL/ICLMultiHOG.h
new file mode 100644
index 0000000000..9f3c775230
--- /dev/null
+++ b/arm_compute/core/CL/ICLMultiHOG.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLMULTIHOG_H__
+#define __ARM_COMPUTE_ICLMULTIHOG_H__
+
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/IMultiHOG.h"
+
+namespace arm_compute
+{
+/** Interface for storing multiple HOG data-objects */
+class ICLMultiHOG : public IMultiHOG
+{
+public:
+ /** Return a pointer to the requested OpenCL HOG model
+ *
+ * @param[in] index The index of the wanted OpenCL HOG model.
+ *
+ * @return A pointer pointed to the HOG model
+ */
+ virtual ICLHOG *cl_model(size_t index) = 0;
+ /** Return a constant pointer to the requested OpenCL HOG model
+ *
+ * @param[in] index The index of the wanted OpenCL HOG model.
+ *
+ * @return A constant pointer pointed to the OpenCL HOG model
+ */
+ virtual const ICLHOG *cl_model(size_t index) const = 0;
+
+ // Inherited methods overridden:
+ IHOG *model(size_t index) override;
+ const IHOG *model(size_t index) const override;
+};
+}
+#endif /*__ARM_COMPUTE_ICLMULTIHOG_H__ */
diff --git a/arm_compute/core/CL/ICLMultiImage.h b/arm_compute/core/CL/ICLMultiImage.h
new file mode 100644
index 0000000000..e8705b1824
--- /dev/null
+++ b/arm_compute/core/CL/ICLMultiImage.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLMULTIIMAGE_H__
+#define __ARM_COMPUTE_ICLMULTIIMAGE_H__
+
+#include "arm_compute/core/IMultiImage.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for OpenCL multi-planar images */
+class ICLMultiImage : public IMultiImage
+{
+public:
+ /** Return a pointer to the requested OpenCL plane of the image.
+ *
+ * @param[in] index The index of the wanted planed.
+ *
+ * @return A pointer pointed to the OpenCL plane
+ */
+ virtual ICLImage *cl_plane(unsigned int index) = 0;
+ /** Return a constant pointer to the requested OpenCL plane of the image.
+ *
+ * @param[in] index The index of the wanted planed.
+ *
+ * @return A constant pointer pointed to the OpenCL plane
+ */
+ virtual const ICLImage *cl_plane(unsigned int index) const = 0;
+
+ // Inherited methods overridden:
+ IImage *plane(unsigned int index) override;
+ const IImage *plane(unsigned int index) const override;
+};
+}
+#endif /*__ARM_COMPUTE_ICLMULTIIMAGE_H__ */
diff --git a/arm_compute/core/CL/ICLSimple2DKernel.h b/arm_compute/core/CL/ICLSimple2DKernel.h
new file mode 100644
index 0000000000..a1366fb211
--- /dev/null
+++ b/arm_compute/core/CL/ICLSimple2DKernel.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__
+#define __ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
+class ICLSimple2DKernel : public ICLSimpleKernel
+{
+public:
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /*__ARM_COMPUTE_ICLSIMPLE2DKERNEL_H__ */
diff --git a/arm_compute/core/CL/ICLSimple3DKernel.h b/arm_compute/core/CL/ICLSimple3DKernel.h
new file mode 100644
index 0000000000..5e981027de
--- /dev/null
+++ b/arm_compute/core/CL/ICLSimple3DKernel.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__
+#define __ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output.
+ * Both input tensor and output tensor must have at least 3 dimensions.
+ */
+class ICLSimple3DKernel : public ICLSimple2DKernel
+{
+public:
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /*__ARM_COMPUTE_ICLSIMPLE3DKERNEL_H__ */
diff --git a/arm_compute/core/CL/ICLSimpleKernel.h b/arm_compute/core/CL/ICLSimpleKernel.h
new file mode 100644
index 0000000000..e9fdb7fb8b
--- /dev/null
+++ b/arm_compute/core/CL/ICLSimpleKernel.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLSIMPLEKERNEL_H__
+#define __ARM_COMPUTE_ICLSIMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */
+class ICLSimpleKernel : public ICLKernel
+{
+public:
+ /** Constructor. */
+ ICLSimpleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ ICLSimpleKernel(const ICLSimpleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete;
+ /** Allow instances of this class to be moved. */
+ ICLSimpleKernel(ICLSimpleKernel &&) = default;
+ /** Allow instances of this class to be moved. */
+ ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default;
+ /** Default destructor */
+ ~ICLSimpleKernel() = default;
+
+ /** Configure the kernel
+ *
+ * @param[in] input Source tensor.
+ * @param[out] output Destination tensor.
+ * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
+ * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] border_size (Optional) Size of the border.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+
+protected:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_ICLSIMPLEKERNEL_H__ */
diff --git a/arm_compute/core/CL/ICLTensor.h b/arm_compute/core/CL/ICLTensor.h
new file mode 100644
index 0000000000..abc0131379
--- /dev/null
+++ b/arm_compute/core/CL/ICLTensor.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ICLTENSOR_H__
+#define __ARM_COMPUTE_ICLTENSOR_H__
+
+#include "arm_compute/core/ITensor.h"
+
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+class CommandQueue;
+}
+
+namespace arm_compute
+{
+/** Interface for OpenCL tensor */
+class ICLTensor : public ITensor
+{
+public:
+ ICLTensor();
+ ICLTensor(const ICLTensor &) = delete;
+ ICLTensor &operator=(const ICLTensor &) = delete;
+ ICLTensor(ICLTensor &&) = default;
+ ICLTensor &operator=(ICLTensor &&) = default;
+ virtual ~ICLTensor() = default;
+
+ /** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data.
+ *
+ * @return A reference to an OpenCL buffer containing the image's data.
+ */
+ virtual const cl::Buffer &cl_buffer() const = 0;
+ /** Enqueue a map operation of the allocated buffer on the given queue.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ *
+ * @return The mapping address.
+ */
+ void map(cl::CommandQueue &q, bool blocking = true);
+ /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ void unmap(cl::CommandQueue &q);
+ /** Clear the contents of the tensor synchronously.
+ *
+ * @param[in,out] q The CL command queue to use for the clear operation.
+ */
+ void clear(cl::CommandQueue &q);
+
+ // Inherited methods overridden:
+ uint8_t *buffer() const override;
+
+protected:
+ /** Method to be implemented by the child class to map the OpenCL buffer
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ * @param[in] blocking If true, then the mapping will be ready to use by the time
+ * this method returns, else it is the caller's responsibility
+ * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+ */
+ virtual uint8_t *do_map(cl::CommandQueue &q, bool blocking) = 0;
+ /** Method to be implemented by the child class to unmap the OpenCL buffer
+ *
+ * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+ * the memory is accessed by the device.
+ *
+ * @param[in,out] q The CL command queue to use for the mapping operation.
+ */
+ virtual void do_unmap(cl::CommandQueue &q) = 0;
+
+private:
+ uint8_t *_mapping;
+};
+
+using ICLImage = ICLTensor;
+}
+#endif /*__ARM_COMPUTE_ICLTENSOR_H__ */
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
new file mode 100644
index 0000000000..2fae35c974
--- /dev/null
+++ b/arm_compute/core/CL/OpenCL.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_OPENCL_H__
+#define __ARM_COMPUTE_OPENCL_H__
+
+/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */
+#define CL_HPP_ENABLE_EXCEPTIONS
+#define CL_HPP_CL_1_2_DEFAULT_BUILD
+#define CL_HPP_TARGET_OPENCL_VERSION 110
+#define CL_HPP_MINIMUM_OPENCL_VERSION 110
+#include <CL/cl2.hpp>
+
+namespace cl
+{
+static const NDRange Range_128_1 = NDRange(128, 1);
+}
+
+namespace arm_compute
+{
+bool opencl_is_available();
+}
+#endif /* __ARM_COMPUTE_OPENCL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
new file mode 100644
index 0000000000..e8bd6aac7f
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__
+#define __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the absolute difference kernel.
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class CLAbsoluteDifferenceKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLAbsoluteDifferenceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
+ /** Allow instances of this class to be moved. */
+ CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
+ /** Allow instances of this class to be moved. */
+ CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
+ /** Default destructor */
+ ~CLAbsoluteDifferenceKernel() = default;
+
+ /** Set the inputs and output images.
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8/S16.
+ * @param[in] input2 Source tensor. Data types supported: U8/S16.
+ * @param[out] output Destination tensor. Data types supported: U8/S16.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1. */
+ const ICLTensor *_input2; /**< Source tensor 2. */
+ ICLTensor *_output; /**< Destination tensor. */
+};
+}
+#endif /* __ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLAccumulateKernel.h b/arm_compute/core/CL/kernels/CLAccumulateKernel.h
new file mode 100644
index 0000000000..5c8ffdb404
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLAccumulateKernel.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLACCUMULATEKERNEL_H__
+#define __ARM_COMPUTE_CLACCUMULATEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the accumulate kernel.
+ *
+ * Accumulation is computed by:
+ * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
+ */
+class CLAccumulateKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] accum Destination tensor. Data types supported: S16.
+ */
+ void configure(const ICLTensor *input, ICLTensor *accum);
+};
+
+/** Interface for the accumulate weighted kernel.
+ *
+ * Weighted accumulation is computed:
+ * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
+ *
+ * Where @f$ 0 \le \alpha \le 1 @f$
+ * Conceptually, the rounding for this is defined as:
+ * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
+*/
+class CLAccumulateWeightedKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation images, and the scale value.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+};
+
+/** Interface for the accumulate squared kernel.
+ *
+ * The accumulation of squares is computed:
+ * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
+ *
+ * Where @f$ 0 \le shift \le 15 @f$
+*/
+class CLAccumulateSquaredKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation tensors and the shift value.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: S16.
+ */
+ void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+};
+}
+#endif /*__ARM_COMPUTE_CLACCUMULATEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
new file mode 100644
index 0000000000..490e70544b
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the activation layer kernel. */
+class CLActivationLayerKernel : public ICLSimple3DKernel
+{
+public:
+ /** Set the input and output tensor.
+ *
+ * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16.
+ * @param[out] output Destination tensor. Data type should match the input data type.
+ * @param[in] act_info Activation layer information.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+};
+}
+#endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
new file mode 100644
index 0000000000..7d736cdf44
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__
+#define __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the arithmetic addition kernel
+ *
+ * Arithmetic addition is computed by:
+ * @f[ output(x,y) = input1(x,y) + input2(x,y) @f]
+ */
+class CLArithmeticAdditionKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLArithmeticAdditionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArithmeticAdditionKernel(const CLArithmeticAdditionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArithmeticAdditionKernel &operator=(const CLArithmeticAdditionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLArithmeticAdditionKernel(CLArithmeticAdditionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLArithmeticAdditionKernel &operator=(CLArithmeticAdditionKernel &&) = default;
+ /** Default destructor */
+ ~CLArithmeticAdditionKernel() = default;
+ /** Initialise the kernel's inputs, output and convertion policy.
+ *
+ * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32.
+ * @param[in] policy Policy to use to handle overflow.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+}
+#endif /* __ARM_COMPUTE_CLARITHMETICADDITIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
new file mode 100644
index 0000000000..afecf6ed7d
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__
+#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the arithmetic subtraction kernel
+ *
+ * Arithmetic subtraction is computed by:
+ * @f[ output(x,y) = input1(x,y) - input2(x,y) @f]
+ */
+class CLArithmeticSubtractionKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLArithmeticSubtractionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArithmeticSubtractionKernel(const CLArithmeticSubtractionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArithmeticSubtractionKernel &operator=(const CLArithmeticSubtractionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLArithmeticSubtractionKernel(CLArithmeticSubtractionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLArithmeticSubtractionKernel &operator=(CLArithmeticSubtractionKernel &&) = default;
+ /** Default destructor */
+ ~CLArithmeticSubtractionKernel() = default;
+
+ /** Initialise the kernel's inputs, output and convertion policy.
+ *
+ * @param[in] input1 First tensor input. Data types supported: U8, S16, F16, F32.
+ * @param[in] input2 Second tensor input. Data types supported: U8, S16, F16, F32.
+ * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16, F16, F32.
+ * @param[in] policy Policy to use to handle overflow.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+}
+#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
new file mode 100644
index 0000000000..088853841b
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the BatchNormalization layer kernel.
+ */
+class CLBatchNormalizationLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLBatchNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator. */
+ CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLBatchNormalizationLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. Data types supported: F32.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * The rest are optional and used for representing batches.
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] epsilon Small value to avoid division with zero.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_mean;
+ const ICLTensor *_var;
+ const ICLTensor *_beta;
+ const ICLTensor *_gamma;
+ float _epsilon;
+};
+}
+#endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
new file mode 100644
index 0000000000..624c422abc
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBitwiseAndKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEANDKERNEL_H__
+#define __ARM_COMPUTE_CLBITWISEANDKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise AND operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
+ */
+class CLBitwiseAndKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseAndKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEANDKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
new file mode 100644
index 0000000000..c9026022e1
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBitwiseNotKernel.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISENOTKERNEL_H__
+#define __ARM_COMPUTE_CLBITWISENOTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise NOT operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = \lnot input(x,y) @f]
+ */
+class CLBitwiseNotKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the inputs and output images.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISENOTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
new file mode 100644
index 0000000000..fe8710fbc1
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBitwiseOrKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEORKERNEL_H__
+#define __ARM_COMPUTE_CLBITWISEORKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise OR operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
+ */
+class CLBitwiseOrKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseOrKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEORKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
new file mode 100644
index 0000000000..f4e0b4df60
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBitwiseXorKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBITWISEXORKERNEL_H__
+#define __ARM_COMPUTE_CLBITWISEXORKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise XOR operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
+ */
+class CLBitwiseXorKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseXorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+}
+#endif /* __ARM_COMPUTE_CLBITWISEXORKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBox3x3Kernel.h b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
new file mode 100644
index 0000000000..0960f7487a
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLBox3x3Kernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLBOX3X3KERNEL_H__
+#define __ARM_COMPUTE_CLBOX3X3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the box 3x3 filter kernel.
+ *
+ */
+class CLBox3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ //Inherited methods overriden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLBOX3X3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
new file mode 100644
index 0000000000..5ca3e03412
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__
+#define __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform Gradient computation.
+ */
+class CLGradientKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGradientKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLGradientKernel(const CLGradientKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLGradientKernel &operator=(const CLGradientKernel &) = delete;
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @note gx, gy and mag must all be the same size (either 16 or 32).
+ *
+ * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
+ * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
+ * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
+ * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
+ * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
+ */
+ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_gx; /**< Source tensor - Gx component */
+ const ICLTensor *_gy; /**< Source tensor - Gy component */
+ ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
+ ICLTensor *_phase; /**< Destination tensor - Quantized phase */
+};
+
+/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
+ *
+ * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
+ * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
+ *
+ * @note Hysteresis is computed in @ref CLEdgeTraceKernel
+ */
+class CLEdgeNonMaxSuppressionKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLEdgeNonMaxSuppressionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
+ /** Initialise the kernel's sources, destination and border mode.
+ *
+ * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
+ * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U16/U32.
+ * @param[in] lower_thr Lower threshold.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
+ const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
+ ICLTensor *_output; /**< Destination tensor. */
+};
+
+/** OpenCL kernel to perform Edge tracing.
+ */
+class CLEdgeTraceKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLEdgeTraceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
+ * Expected to be initialized to 0 before each run.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+ ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor. */
+ ICLTensor *_output; /**< Destination tensor. */
+ int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */
+ int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */
+ ICLTensor *_visited; /**< Marks visited elements */
+ ICLTensor *_recorded; /**< Marks recorded elements */
+ ICLTensor *_l1_stack; /**< L1 hysteris stack */
+ ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
+};
+}
+#endif /* __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
new file mode 100644
index 0000000000..3e718a2f1a
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__
+#define __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include <array>
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the channel combine kernel */
+class CLChannelCombineKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLChannelCombineKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelCombineKernel(const CLChannelCombineKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLChannelCombineKernel(CLChannelCombineKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default;
+ /** Default destructor */
+ ~CLChannelCombineKernel() = default;
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
+ * @param[out] output The single planar output tensor.
+ */
+ void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[out] output The multi planar output tensor.
+ */
+ void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ std::array<const ICLTensor *, 4> _planes;
+ ICLTensor *_output;
+ ICLMultiImage *_output_multi;
+ std::array<uint32_t, 3> _x_subsampling;
+ std::array<uint32_t, 3> _y_subsampling;
+};
+}
+#endif /* __ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLChannelExtractKernel.h b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
new file mode 100644
index 0000000000..3e9e699a50
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLChannelExtractKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__
+#define __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the channel extract kernel */
+class CLChannelExtractKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLChannelExtractKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelExtractKernel(const CLChannelExtractKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLChannelExtractKernel(CLChannelExtractKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default;
+ /** Default destructor */
+ ~CLChannelExtractKernel() = default;
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor.
+ * @param[in] channel Channel to extract.
+ * @param[out] output Destination tensor. Must be of U8 format.
+ */
+ void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image.
+ * @param[in] channel Channel to extract.
+ * @param[out] output Single-planar 2D destination image. Must be of U8 format.
+ */
+ void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _num_elems_processed_per_iteration;
+ uint32_t _subsampling;
+};
+}
+#endif /* __ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
new file mode 100644
index 0000000000..9d445e3004
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCOL2IMKERNEL_H__
+#define __ARM_COMPUTE_CLCOL2IMKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the col2im reshaping kernel.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLCol2ImKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCol2ImKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCol2ImKernel(const CLCol2ImKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCol2ImKernel(CLCol2ImKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default;
+ /** Default destructor */
+ ~CLCol2ImKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Data types supported: F16, F32
+ * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * @param[in] convolved_dims Output convolved dimensions.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ std::pair<unsigned int, unsigned int> _convolved_dims;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLCOL2IMKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
new file mode 100644
index 0000000000..a88e2dcdf3
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__
+#define __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the color convert kernel.
+ *
+ */
+class CLColorConvertKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLColorConvertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLColorConvertKernel(const CLColorConvertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLColorConvertKernel(CLColorConvertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default;
+ /** Default destructor. */
+ ~CLColorConvertKernel() = default;
+
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor
+ * @param[out] output Destination tensor
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input multi-planar source image
+ * @param[out] output single-planar destination image
+ */
+ void configure(const ICLMultiImage *input, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input single-planar source image
+ * @param[out] output multi-planar destination image
+ */
+ void configure(const ICLImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input multi-planar source image
+ * @param[out] output multi-planar destination image
+ */
+ void configure(const ICLMultiImage *input, ICLMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /*pointer to single planar tensor input */
+ ICLTensor *_output; /*pointer to single planar tensor output */
+ const ICLMultiImage *_multi_input; /*pointer to multi-planar input */
+ ICLMultiImage *_multi_output; /*pointer to multi-planar output */
+};
+}
+
+#endif /* __ARM_COMPUTE_CLCOLORCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
new file mode 100644
index 0000000000..9c0908405a
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__
+#define __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/****************************************************************************************\
+ * Square Convolution *
+\****************************************************************************************/
+
+/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
+ * The client can supply a convolution matrix \f$ C_{m,n} \f$.
+ * @f{eqnarray}{
+ * k_0 &=& \frac{m}{2} \\
+ * l_0 &=& \frac{n}{2} \\
+ * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
+ * @f}
+ *
+ * @note The above equation for this function is similar to the default OpenCV Filter2D function,
+ * which actually computes a correlation and not a convolution.
+ * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
+ */
+template <unsigned int matrix_size>
+class CLConvolutionKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+
+/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
+using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
+/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
+using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
+/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
+using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
+/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
+using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
+
+/****************************************************************************************\
+ * Separable Square Convolution *
+\****************************************************************************************/
+
+/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
+template <unsigned int matrix_size>
+class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
+{
+public:
+ /** Default Constructor */
+ CLSeparableConvolutionHorKernel();
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
+using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
+/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
+using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
+/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
+using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
+
+/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
+template <unsigned int matrix_size>
+class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: S16.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+
+/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
+using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
+/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
+using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
+/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
+using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
+
+/****************************************************************************************\
+ * Rectangle Convolution *
+\****************************************************************************************/
+
+/** Kernel for the running convolution on a rectangle matrix.
+ *
+ * @note Supports combinations of 3,5,7 and 9.
+ */
+class CLConvolutionRectangleKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLConvolutionRectangleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] width Width of convolution matrix (Number of columns)
+ * @param[in] height Height of convolution matrix (Number of rows)
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size;
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
new file mode 100644
index 0000000000..eda4c66883
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CLDepthConcatenateKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthConcatenateKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default;
+ /** Default destructor */
+ ~CLDepthConcatenateKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] input Input tensor. Data types supported: F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] output Output tensor. Data types supported: F32.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ int _top_bottom;
+ int _left_right;
+};
+}
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h
new file mode 100644
index 0000000000..2c3b1b8b69
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth conversion kernel.
+ *
+ */
+class CLDepthConvertKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and output of the kernel.
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - U8 -> U16, S16, U32, S32
+ * - U16 -> U8, U32, S32
+ * - S16 -> U8, U32, S32
+ * - U32 -> U8, U16, S16
+ * - S32 -> U8, U16, S16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8, U16, S16, U32 or S32.
+ * @param[out] output The output tensor. Data types supported: U8, U16, S16, U32 or S32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+};
+}
+
+#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDerivativeKernel.h b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
new file mode 100644
index 0000000000..17552aefbe
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDerivativeKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__
+#define __ARM_COMPUTE_CLDERIVATIVEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the derivative kernel. */
+class CLDerivativeKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDerivativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLDerivativeKernel(const CLDerivativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDerivativeKernel(CLDerivativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default;
+ /** Default destructor */
+ ~CLDerivativeKernel() = default;
+ /** Initialise the kernel's sources, destination and border
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */
+ ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */
+ bool _run_derivative_x; /**< Do we need to run Derivative X ? */
+ bool _run_derivative_y; /**< Do we need to run Derivative Y ? */
+};
+}
+#endif /*__ARM_COMPUTE_CLDERIVATIVEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDilateKernel.h b/arm_compute/core/CL/kernels/CLDilateKernel.h
new file mode 100644
index 0000000000..a5d3beb02f
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDilateKernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDILATEKERNEL_H__
+#define __ARM_COMPUTE_CLDILATEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the dilate kernel.
+ *
+ */
+class CLDilateKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLDILATEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLErodeKernel.h b/arm_compute/core/CL/kernels/CLErodeKernel.h
new file mode 100644
index 0000000000..a43c925be6
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLErodeKernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLERODEKERNEL_H__
+#define __ARM_COMPUTE_CLERODEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the erode kernel.
+ *
+ */
+class CLErodeKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLERODEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLFastCornersKernel.h b/arm_compute/core/CL/kernels/CLFastCornersKernel.h
new file mode 100644
index 0000000000..9817b78ae0
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLFastCornersKernel.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__
+#define __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** CL kernel to perform fast corners */
+class CLFastCornersKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFastCornersKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFastCornersKernel(const CLFastCornersKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFastCornersKernel(CLFastCornersKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default;
+ /** Default destructor */
+ ~CLFastCornersKernel() = default;
+
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Output image. Data types supported: U8.
+ * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+ * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+ * @param[in] border_mode Strategy to use for borders.
+ */
+ void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLImage *_input;
+ ICLImage *_output;
+};
+
+/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */
+class CLCopyToArrayKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCopyToArrayKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default;
+ /** Default destructor */
+ ~CLCopyToArrayKernel() = default;
+
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[in] update_number Flag to indicate whether we need to update the number of corners
+ * @param[out] corners Array of keypoints to store the results.
+ * @param[out] num_buffers Number of keypoints to store the results.
+ */
+ void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input; /**< source image */
+ ICLKeyPointArray *_corners; /**< destination array */
+ cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */
+};
+}
+#endif /* __ARM_COMPUTE_CLFASTCORNERSKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
new file mode 100644
index 0000000000..797f86dae8
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLFILLBORDERKERNEL_H__
+#define __ARM_COMPUTE_CLFILLBORDERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the border of a kernel */
+class CLFillBorderKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFillBorderKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFillBorderKernel(const CLFillBorderKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFillBorderKernel(CLFillBorderKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default;
+ /** Default destructor */
+ ~CLFillBorderKernel() = default;
+
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in,out] tensor Tensor to process Data types supported: U8, S16, S32, F32.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+ /** Function to set the constant value on fill border kernel depending on type.
+ *
+ * @param[in] idx Index of the kernel argument to set.
+ * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ template <class T>
+ void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ bool is_parallelisable() const override;
+
+private:
+ ICLTensor *_tensor;
+};
+}
+#endif /*__ARM_COMPUTE_CLFILLBORDERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
new file mode 100644
index 0000000000..3ac7b3c4fa
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__
+#define __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel which interleaves the elements of a matrix A in chunk of 4x4
+ *
+ * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
+ */
+class CLGEMMInterleave4x4Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMInterleave4x4Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMInterleave4x4Kernel(const CLGEMMInterleave4x4Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMInterleave4x4Kernel &operator=(const CLGEMMInterleave4x4Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMInterleave4x4Kernel(CLGEMMInterleave4x4Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLGEMMINTERLEAVE4X4KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..f84d0638da
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to compute low precision matrix multiplication kernel
+ *
+ * This kernel performs the following computation:
+ * -# Convert a values from uint8 to int32 and add a_offset to each of them.
+ * -# Convert b values from uint8 to int32 and add b_offset to each of them.
+ * -# Compute the int32 matrix product of the resulting a * b.
+ * -# Add output_offset to each entry of the result.
+ * -# Multiply each entry of the result and round to the nearest integer
+ * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8.
+ */
+class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMLowpMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyKernel(const CLGEMMLowpMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyKernel &operator=(const CLGEMMLowpMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyKernel(CLGEMMLowpMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyKernel &operator=(CLGEMMLowpMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel.
+ * These two kernels change the layout of the original matrices to be more cache-friendly.
+ *
+ * @param[in] input0 Input tensor containing the interleaved Matrix A. Data types supported: U8
+ * @param[in] input1 Input tensor containing the transposed Matrix B. Data types supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication, Data types supported: same as @p input0
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_offset Offset to be added to each element of the output matrix
+ * @param[in] output_mult_int Offset to be added to each element of the output matrix
+ * @param[in] shift Number of bits to shift right the result.
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 0000000000..ea1db9f831
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+#define __ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixAccumulateBiasesKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+ */
+ void configure(ICLTensor *accum, const ICLTensor *biases);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_accum;
+ const ICLTensor *_biases;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h
new file mode 100644
index 0000000000..c808039567
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__
+#define __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta.
+ * The matrices must have the same dimensions
+ *
+ * @note This kernel is computed if and only if beta != 0.0.
+ */
+class CLGEMMMatrixAdditionKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixAdditionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAdditionKernel(const CLGEMMMatrixAdditionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixAdditionKernel &operator=(const CLGEMMMatrixAdditionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAdditionKernel(CLGEMMMatrixAdditionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixAdditionKernel &operator=(CLGEMMMatrixAdditionKernel &&) = default;
+ /** Initialise the kernel's input, output and beta value
+ *
+ * @note The input and output tensors must have the same dimensions
+ *
+ * @param[in] input Input tensor (Matrix C). Data types supported: F16/F32
+ * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input
+ * @param[in] beta Weight of matrix C
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, float beta);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLGEMMMATRIXADDITIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..07ea3c12ac
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
+ *
+ * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref CLGEMMInterleave4x4Kernel" and @ref CLGEMMTranspose1xWKernel
+ * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class CLGEMMMatrixMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+ * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
new file mode 100644
index 0000000000..8d44a4c4fa
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__
+#define __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel which transposes the elements of a matrix in chunks of 1x4 if the input data type is F32 or in chunks of 1x8 if the input data type is F16.
+ *
+ * Following an example of how the transposition1xW works when the input data type is F32
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * Following an example of how the transposition1xW works when the input data type is F16
+ *
+ * @f[
+ * \left( \begin{array}{cccccccc}
+ * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a7 \\
+ * a10 & a11 & a12 & a13 & a14 & a15 & a16 & 17 \\
+ * a20 & a21 & a22 & a23 & a24 & a25 & a26 & 27 \\
+ * a30 & a31 & a32 & a33 & a34 & a35 & a36 & 37 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\
+ * \end{array} \right)
+ * @f]
+ *
+ * @note If the input data type is F32, the output matrix will have the following shape: [ height * 4, width / 4 ]
+ * @note If the input data type is F16, the output matrix will have the following shape: [ height * 8, width / 8 ]
+ * @note If the input data type is U8, the output matrix will have the following shape: [ height * 16, width / 16 ]
+ *
+ */
+class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/F16/F32
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /* __ARM_COMPUTE_CLGEMMTRANSPOSE1XWKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
new file mode 100644
index 0000000000..028a10b421
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__
+#define __ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Gaussian 3x3 filter kernel.
+ *
+ */
+class CLGaussian3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
new file mode 100644
index 0000000000..1484c06311
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__
+#define __ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__
+
+#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */
+class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel
+{
+public:
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+private:
+ //Make the configure method of the parent class private
+ using CLSeparableConvolution5x5HorKernel::configure;
+};
+
+/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */
+class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel
+{
+public:
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+private:
+ //Make the configure method of the parent class private
+ using CLSeparableConvolution5x5VertKernel::configure;
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
new file mode 100644
index 0000000000..6d79d0e718
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__
+#define __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
+class CLGaussianPyramidHorKernel : public ICLSimpleKernel
+{
+public:
+ /** Default constructor */
+ CLGaussianPyramidHorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default;
+ /** Default destructor */
+ ~CLGaussianPyramidHorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size;
+ int _l2_load_offset;
+};
+
+/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
+class CLGaussianPyramidVertKernel : public ICLSimpleKernel
+{
+public:
+ /** Default constructor */
+ CLGaussianPyramidVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default;
+ /** Default destructor */
+ ~CLGaussianPyramidVertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U16.
+ * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ int _t2_load_offset;
+};
+}
+#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
new file mode 100644
index 0000000000..45a5aac1bc
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__
+#define __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/core/Size2D.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** OpenCL kernel to perform HOG Orientation Binning */
+class CLHOGOrientationBinningKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGOrientationBinningKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGOrientationBinningKernel() = default;
+
+ /** Initialise the kernel's inputs, output and HOG's metadata
+ *
+ * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+ * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+ * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input_magnitude;
+ const ICLTensor *_input_phase;
+ ICLTensor *_output;
+ Size2D _cell_size;
+};
+
+/** OpenCL kernel to perform HOG block normalization */
+class CLHOGBlockNormalizationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGBlockNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGBlockNormalizationKernel() = default;
+
+ /** Initialise the kernel's input, output and HOG's metadata
+ *
+ * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Size2D _num_cells_per_block_stride;
+};
+}
+#endif /* __ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
new file mode 100644
index 0000000000..47bd0549ee
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLHOGDetectorKernel.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__
+#define __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/OpenCL.h"
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform HOG detector kernel using linear SVM */
+class CLHOGDetectorKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGDetectorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGDetectorKernel() = default;
+
+ /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+ *
+ * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
+ * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
+ * @param[in] num_detection_windows Number of detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the hog->info()->block_stride()
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
+ uint16_t idx_class = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue);
+
+private:
+ const ICLTensor *_input;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer *_num_detection_windows;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHOGDETECTORKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
new file mode 100644
index 0000000000..d8057df8d1
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLHarrisCornersKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__
+#define __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the harris score kernel.
+ *
+ * @note The implementation supports 3, 5, and 7 for the block_size.
+ */
+class CLHarrisScoreKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHarrisScoreKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default;
+ /** Default destructor */
+ ~CLHarrisScoreKernel() = default;
+
+ /** Setup the kernel parameters
+ *
+ * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
+ * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
+ * @param[out] output Destination image (harris score). Data types supported F32
+ * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
+ * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
+ * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+ int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+ bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+protected:
+ const ICLImage *_input1; /**< Source image - Gx component */
+ const ICLImage *_input2; /**< Source image - Gy component */
+ ICLImage *_output; /**< Source image - Harris score */
+ float _sensitivity; /**< Sensitivity value */
+ float _strength_thresh; /**< Threshold value */
+ float _norm_factor; /**< Normalization factor */
+ BorderSize _border_size; /**< Border size */
+};
+}
+#endif /* __ARM_COMPUTE_CLHARRISCORNERSKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLHistogramKernel.h b/arm_compute/core/CL/kernels/CLHistogramKernel.h
new file mode 100644
index 0000000000..b65e62d9a2
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLHistogramKernel.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__
+#define __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLDistribution1D;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16.
+ * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel
+ */
+class CLHistogramKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLHistogramKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramKernel(const CLHistogramKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramKernel &operator=(const CLHistogramKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHistogramKernel(CLHistogramKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHistogramKernel &operator=(CLHistogramKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const ICLImage *input, ICLDistribution1D *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input;
+ ICLDistribution1D *_output;
+};
+
+/** Interface to run the histogram kernel to handle the leftover part of image
+ *
+ */
+class CLHistogramBorderKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLHistogramBorderKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const ICLImage *input, ICLDistribution1D *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input;
+ ICLDistribution1D *_output;
+};
+}
+
+#endif /* __ARM_COMPUTE_CLHISTOGRAMKERNEL_H__*/
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
new file mode 100644
index 0000000000..d2224b53e1
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLIM2COLKERNEL_H__
+#define __ARM_COMPUTE_CLIM2COLKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * =
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLIm2ColKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLIm2ColKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIm2ColKernel(const CLIm2ColKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLIm2ColKernel(CLIm2ColKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32
+ * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] convolved_dims The convolved output dimensions.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims, const PadStrideInfo &conv_info, bool has_bias);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.
+ */
+ void run_reduced(const Window &window, cl::CommandQueue &queue);
+ /** run the generic convolution layer input reshape kernel
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.
+ */
+ void run_generic(const Window &window, cl::CommandQueue &queue);
+
+ /** Common signature for the kernel to run */
+ using Im2ColFunction = void (CLIm2ColKernel::*)(const Window &, cl::CommandQueue &);
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ std::pair<unsigned int, unsigned int> _convolved_dims;
+ PadStrideInfo _conv_info;
+ int _kernel_size;
+ unsigned int _num_elems_processed_per_iteration;
+ Im2ColFunction _run_func;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLIntegralImageKernel.h b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
new file mode 100644
index 0000000000..0f53c2d2a8
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLIntegralImageKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__
+#define __ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to run the horizontal pass of the integral image kernel. */
+class CLIntegralImageHorKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output Destination tensor, Data types supported: U32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+
+/** Interface to run the vertical pass of the integral image kernel. */
+class CLIntegralImageVertKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLIntegralImageVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in,out] in_out The input/output tensor. Data types supported: U32
+ */
+ void configure(ICLTensor *in_out);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_in_out;
+};
+}
+#endif /*__ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLLKTrackerKernel.h b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
new file mode 100644
index 0000000000..4d0dbed55d
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLLKTrackerKernel.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLKTRACKERKERNEL_H__
+#define __ARM_COMPUTE_CLLKTRACKERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Internal keypoint structure for Lucas-Kanade Optical Flow */
+struct CLLKInternalKeypoint
+{
+ float x{ 0.f }; /**< x coordinate of the keypoint */
+ float y{ 0.f }; /**< y coordinate of the keypoint */
+ float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
+ float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
+struct CLCoefficientTable
+{
+ float A11; /**< iA11 * FLT_SCALE */
+ float A12; /**< iA11 * FLT_SCALE */
+ float A22; /**< iA11 * FLT_SCALE */
+ float min_eig; /**< Minimum eigenvalue */
+};
+
+/** Structure for storing ival, ixval and iyval for each point inside the window */
+struct CLOldValue
+{
+ int16_t ival; /**< ival extracts from old image */
+ int16_t ixval; /**< ixval extracts from scharr Gx image */
+ int16_t iyval; /**< iyval extracts from scharr Gy image */
+ int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
+using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
+using ICLOldValArray = ICLArray<CLOldValue>;
+
+/** Interface to run the initialization step of LKTracker */
+class CLLKTrackerInitKernel : public ICLKernel
+{
+public:
+ /** Initialise the kernel input and output
+ *
+ * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
+ * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
+ * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
+ * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
+ * @param[in] level The pyramid level
+ * @param[in] num_levels The number of pyramid levels
+ * @param[in] pyramid_scale Scale factor used for generating the pyramid
+ */
+ void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */
+class CLLKTrackerFinalizeKernel : public ICLKernel
+{
+public:
+ /** Initialise the kernel input and output
+ *
+ * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
+ */
+ void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */
+class CLLKTrackerStage0Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLKTrackerStage0Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default;
+ /** Initialise the kernel input and output
+ *
+ * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
+ * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
+ * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
+ * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
+ * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[out] old_ival Pointer to the array holding internal values
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ size_t window_dimension, size_t level);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_old_input;
+ const ICLTensor *_old_scharr_gx;
+ const ICLTensor *_old_scharr_gy;
+};
+
+/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */
+class CLLKTrackerStage1Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLKTrackerStage1Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default;
+ /** Initialise the kernel input and output
+ *
+ * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
+ * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[in] old_ival Pointer to the array holding internal values
+ * @param[in] termination The criteria to terminate the search of each keypoint.
+ * @param[in] epsilon The error for terminating the algorithm
+ * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_new_input;
+};
+}
+#endif /*__ARM_COMPUTE_CLLKTRACKERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..fda0327461
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor.
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLocallyConnectedMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] input0 First input tensor. Data types supported: F32
+ * @param[in] input1 Second input tensor. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+};
+}
+#endif /* __ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
new file mode 100644
index 0000000000..a8e1dcb361
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__
+#define __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Template interface for the kernel to compute magnitude and phase.
+ *
+ */
+class CLMagnitudePhaseKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLMagnitudePhaseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default;
+ /** Initialise the kernel's input, output.
+ *
+ * @note At least one of output1 or output2 must be set.
+ *
+ * @param[in] gx The input gradient X tensor. Data types supported: S16.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
+ * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
+ * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
+ * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
+ */
+ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+ MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_gx; /**< Input gradient X. */
+ const ICLTensor *_gy; /**< Input gradient Y. */
+ ICLTensor *_magnitude; /**< Output - Magnitude. */
+ ICLTensor *_phase; /**< Output - Phase. */
+ bool _run_mag; /**< Calculate magnitude ? */
+ bool _run_phase; /**< Calculate phase ? */
+};
+}
+
+#endif /* __ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
new file mode 100644
index 0000000000..9f30f76e1b
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__
+#define __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
+class CLMeanStdDevKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMeanStdDevKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Input image. Data types supported: U8.
+ * @param[out] mean Input average pixel value.
+ * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
+ * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
+ */
+ void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input;
+ float *_mean;
+ float *_stddev;
+ cl::Buffer *_global_sum;
+ cl::Buffer *_global_sum_squared;
+};
+}
+#endif /* __ARM_COMPUTE_CLMEANSTDDEVKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
new file mode 100644
index 0000000000..5af364b6c6
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLMedian3x3Kernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__
+#define __ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the median 3x3 filter kernel.
+ *
+ */
+class CLMedian3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLMEDIAN3X3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
new file mode 100644
index 0000000000..6a31f3cf18
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__
+#define __ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+
+#include <array>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the kernel to perform min max search on an image.
+ */
+class CLMinMaxKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMinMaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxKernel(const CLMinMaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMinMaxKernel(CLMinMaxKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input Image. Data types supported: U8 or S16.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32.
+ */
+ void configure(const ICLImage *input, cl::Buffer *min_max);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Input image. */
+ cl::Buffer *_min_max; /**< Minimum/maximum value. */
+ std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */
+};
+
+/** Interface for the kernel to find min max locations of an image.
+ */
+class CLMinMaxLocationKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLMinMaxLocationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+ *
+ * @param[in] input Input image. Data types supported: U8 or S16.
+ * @param[in] min_max Buffer of 2 elements which contains the min value at position 0 and the max value at position 1. Data type supported: S32
+ * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
+ * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
+ * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
+ */
+ void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
+ ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input; /**< Input image. */
+ cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */
+};
+}
+#endif /*__ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
new file mode 100644
index 0000000000..0c59063bbc
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__
+#define __ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to apply a non-linear filter */
+class CLNonLinearFilterKernel : public ICLSimple2DKernel
+{
+public:
+ /** Default constructor */
+ CLNonLinearFilterKernel();
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data types supported: U8
+ * @param[out] output Destination tensor. Data types supported: U8
+ * @param[in] function Non linear function to perform
+ * @param[in] mask_size Mask size. Supported sizes: 3, 5
+ * @param[in] pattern Mask pattern
+ * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+ unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size; /**< Border size */
+};
+}
+#endif /*__ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
new file mode 100644
index 0000000000..1719bbbb47
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__
+#define __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL
+ *
+ * @note Used by @ref CLFastCorners and @ref CLHarrisCorners
+ */
+class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
+ * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /* __ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
new file mode 100644
index 0000000000..ca9034b162
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class CLNormalizationLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator. */
+ CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16, F32.
+ * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * Data types should match the input type.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *squared_input, ICLTensor *output, NormalizationLayerInfo norm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_squared_input;
+ ICLTensor *_output;
+ BorderSize _border_size;
+};
+}
+#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
new file mode 100644
index 0000000000..6fbbe95219
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__
+#define __ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pixelwise multiplication kernel.
+ *
+ */
+class CLPixelWiseMultiplicationKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLPixelWiseMultiplicationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor. Data types supported: U8, S16, F16, F32.
+ * @param[in] input2 An input tensor. Data types supported: U8, S16, F16, F32.
+ * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16, F16, F32.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
new file mode 100644
index 0000000000..546a40b15e
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pooling layer kernel */
+class CLPoolingLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLPoolingLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16, F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ PoolingLayerInfo _pool_info;
+ BorderSize _border_size;
+};
+}
+#endif /*__ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLRemapKernel.h b/arm_compute/core/CL/kernels/CLRemapKernel.h
new file mode 100644
index 0000000000..7cebf2e817
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLRemapKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLREMAPKERNEL_H__
+#define __ARM_COMPUTE_CLREMAPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a remap on a tensor */
+class CLRemapKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLRemapKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLRemapKernel(const CLRemapKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLRemapKernel &operator=(const CLRemapKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRemapKernel(CLRemapKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRemapKernel &operator=(CLRemapKernel &&) = default;
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_map_x;
+ const ICLTensor *_map_y;
+};
+}
+#endif /*__ARM_COMPUTE_CLREMAPKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h
new file mode 100644
index 0000000000..e74a7cb82a
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLScaleKernel.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSCALEKERNEL_H__
+#define __ARM_COMPUTE_CLSCALEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the warp affine kernel.*/
+class CLScaleKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's inputs, output and interpolation policy
+ *
+ * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
+ *
+ * @param[in] input Source tensor. Data types supported: U8, S16.
+ * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor).
+ * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] policy Interpolation type to use
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLSCALEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
new file mode 100644
index 0000000000..fe245cc351
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLScharr3x3Kernel.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__
+#define __ARM_COMPUTE_CLSCHARR3X3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
+ *
+ * @f[
+ * \mathbf{G}_x=\begin{vmatrix}
+ * -3 & 0 & +3\\
+ * -10& 0 & +10\\
+ * -3 & 0 & +3
+ * \end{vmatrix}
+ * @f]
+ * @f[
+ * \mathbf{G}_y=\begin{vmatrix}
+ * -3 & -10 & -3\\
+ * 0 & 0 & 0\\
+ * +3 & +10 & +3
+ * \end{vmatrix}
+ * @f]
+ */
+class CLScharr3x3Kernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLScharr3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default;
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ bool _run_scharr_x; /**< Do we need to run Scharr X ? */
+ bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
+ const ICLTensor *_input; /**< Input image */
+ ICLTensor *_output_x; /**< Output image for scharr X */
+ ICLTensor *_output_y; /**< Output image for scharr Y */
+};
+}
+#endif /*__ARM_COMPUTE_CLSCHARR3X3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
new file mode 100644
index 0000000000..9edeb6ceff
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLSobel3x3Kernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__
+#define __ARM_COMPUTE_CLSOBEL3X3KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */
+class CLSobel3x3Kernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default;
+ /** Default destructor */
+ ~CLSobel3x3Kernel() = default;
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< Output tensor for Sobel X */
+ ICLTensor *_output_y; /**< Output tensor for Sobel Y */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL3X3KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
new file mode 100644
index 0000000000..e90f8f587e
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLSobel5x5Kernel.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__
+#define __ARM_COMPUTE_CLSOBEL5X5KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */
+class CLSobel5x5HorKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel5x5HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel5x5HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< X output of horizontal pass */
+ ICLTensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */
+class CLSobel5x5VertKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel5x5VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel5x5VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
+ const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
+ ICLTensor *_output_x; /**< X output of sobel */
+ ICLTensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL5X5KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
new file mode 100644
index 0000000000..e5ef8444ee
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLSobel7x7Kernel.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__
+#define __ARM_COMPUTE_CLSOBEL7X7KERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */
+class CLSobel7x7HorKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel7x7HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel7x7HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< X output of horizontal pass */
+ ICLTensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */
+class CLSobel7x7VertKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel7x7VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel7x7VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
+ const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
+ ICLTensor *_output_x; /**< X output of sobel */
+ ICLTensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+}
+#endif /*__ARM_COMPUTE_CLSOBEL7X7KERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
new file mode 100644
index 0000000000..0806974ad6
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
+#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the identifying the max value of 1D Logits */
+class CLLogits1DMaxKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1.
+ * @param[out] output Destination tensor. Matching input type and channel number.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+
+/** Interface for shifting the logits values around the max value and exponentiating the result */
+class CLLogits1DShiftExpSumKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLogits1DShiftExpSumKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1.
+ * @param[in] max Max values tensor. Matching input type and channel number.
+ * @param[out] output Destination tensor. Matching input type and channel number.
+ * @param[out] sum Sum of 1D logits tensor. Matching input type and channel number.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_max;
+ ICLTensor *_output;
+ ICLTensor *_sum;
+};
+
+/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
+class CLLogits1DNormKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLogits1DNormKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16, F32. Number of channels must be 1.
+ * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Matching input type and channel number.
+ * @param[out] output Destination tensor. Matching input type and channel number.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_sum;
+ ICLTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLTableLookupKernel.h b/arm_compute/core/CL/kernels/CLTableLookupKernel.h
new file mode 100644
index 0000000000..477f58dc38
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLTableLookupKernel.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__
+#define __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+class ICLLut;
+
+/** Interface for the kernel to perform table lookup calculations. */
+class CLTableLookupKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, lut and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8, S16.
+ * @param[in] lut The input LUT. Data types supported: U8, S16.
+ * @param[out] output The output tensor. Data types supported: U8, S16.
+ */
+ void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLTABLELOOKUPKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h
new file mode 100644
index 0000000000..d7a6ae2cdb
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__
+#define __ARM_COMPUTE_CLTHRESHOLDKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the thresholding kernel.
+ *
+ */
+class CLThresholdKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input, output and threshold parameters.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+ * @param[in] false_value value to set when the condition is not respected.
+ * @param[in] true_value value to set when the condition is respected.
+ * @param[in] type Thresholding type. Either RANGE or BINARY.
+ * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+ uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
+};
+}
+#endif /*__ARM_COMPUTE_NETHRESHOLDKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLTransposeKernel.h b/arm_compute/core/CL/kernels/CLTransposeKernel.h
new file mode 100644
index 0000000000..9ad183f8f1
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLTransposeKernel.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__
+#define __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class CLTransposeKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+};
+}
+#endif /* __ARM_COMPUTE_CLTRANSPOSEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLWarpAffineKernel.h b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
new file mode 100644
index 0000000000..05d6d0a8f7
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLWarpAffineKernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__
+#define __ARM_COMPUTE_CLWARPAFFINEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the warp affine kernel.*/
+class CLWarpAffineKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 2x3 of type float.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+#endif /*__ARM_COMPUTE_CLWARPAFFINEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
new file mode 100644
index 0000000000..5c5013c599
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__
+#define __ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Interface for the warp perspective kernel.*/
+class CLWarpPerspectiveKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const float *matrix, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+}
+
+#endif /*__ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
new file mode 100644
index 0000000000..1dc8a8b80e
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__
+#define __ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class CLWeightsReshapeKernel : public ICLKernel
+{
+public:
+ /** Constructor.
+ *
+ * @param[in] is_shared Flag to indicate whether the weights are shared or not.
+ */
+ CLWeightsReshapeKernel(bool is_shared = false);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default;
+ /** Default destructor */
+ ~CLWeightsReshapeKernel() = default;
+
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: F16, F32
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
+ * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
+ */
+ void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output);
+
+ // Inherited methods overridden:
+ virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
+
+protected:
+ bool _is_shared;
+ const ICLTensor *_input;
+ const ICLTensor *_biases;
+ ICLTensor *_output;
+};
+
+/** Interface for the weights reshape kernel used by convolution and fully connected layers.
+ *
+ * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
+ * In combination with the @ref CLIm2ColKernel can transform a convolution into a matrix multiplication.
+ *
+ * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
+ * @f[
+ * \left( \begin{array}{ccc}
+ * a000 & a001 & a002 \\
+ * a010 & a011 & a012 \\
+ * a020 & a021 & a022 \\
+ * \end{array} \right)
+ * \left( \begin{array}{ccc}
+ * a100 & a101 & a102 \\
+ * a110 & a111 & a112 \\
+ * a120 & a121 & a122 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLConvolutionLayerWeightsReshapeKernel : public CLWeightsReshapeKernel
+{
+public:
+ /** Default constructor */
+ CLConvolutionLayerWeightsReshapeKernel();
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** Interface for the weights reshape kernel used by locally connected layers. */
+class CLLocallyConnectedLayerWeightsReshapeKernel : public CLWeightsReshapeKernel
+{
+public:
+ /** Default constructor */
+ CLLocallyConnectedLayerWeightsReshapeKernel();
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /*__ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H__ */