From ac4e873dad6aa6291fc36aff62047a896db04f6a Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 5 Jul 2017 17:02:25 +0100 Subject: COMPMID-417: Port DepthConcatenate to QS8/QS16 for NEON/CL. Change-Id: I3dddae63043c7aa18d908a4fc8abacf3c64f98ca Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80081 Tested-by: Kaizen Reviewed-by: Steven Niu --- arm_compute/core/CL/CLHelpers.h | 8 ++++++++ arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h | 4 ++-- .../core/NEON/kernels/NEDepthConcatenateKernel.h | 18 +++++++++++------- arm_compute/runtime/CL/functions/CLDepthConcatenate.h | 9 +++++---- .../runtime/NEON/functions/NEDepthConcatenate.h | 9 +++++---- 5 files changed, 31 insertions(+), 17 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index 01980d9793..eeb3e7699d 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -53,6 +53,14 @@ static constexpr const unsigned int max_cl_vector_width = 16; */ std::string get_cl_type_from_data_type(const DataType &dt); +/** Translates fixed point tensor data type to the underlying OpenCL type. + * + * @param[in] dt @ref DataType to be translated to OpenCL type. + * + * @return The string specifying the underlying OpenCL type to be used. + */ +std::string get_underlying_cl_type_from_data_type(const DataType &dt); + /** Translates a given gpu device target to string. * * @param[in] target Given gpu target. diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h index eda4c66883..e85e0ec232 100644 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h @@ -52,9 +52,9 @@ public: ~CLDepthConcatenateKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: F32. + * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: F32. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h index 7384cd1f02..b22d37bfe6 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h @@ -51,9 +51,9 @@ public: ~NEDepthConcatenateKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: F32. + * @param[in] input Input tensor. Data types supported: QS8/QS16/F16/F32. * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: F32. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. @@ -66,11 +66,15 @@ public: BorderSize border_size() const override; private: - const ITensor *_input; - ITensor *_output; - int _top_bottom; - int _left_right; - unsigned int _depth_offset; + using DepthConcatFunction = void(const ITensor *in, ITensor *out, std::pair start_xy, int depth_offset, const Window &window); + +private: + DepthConcatFunction *_func; + const ITensor *_input; + ITensor *_output; + int _top_bottom; + int _left_right; + unsigned int _depth_offset; }; } #endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h index 3199936b82..77997f6bd1 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h +++ b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h @@ -29,14 +29,15 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" +#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" + #include #include namespace arm_compute { class ICLTensor; -class CLDepthConcatenateKernel; -class CLFillBorderKernel; /** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: * @@ -51,8 +52,8 @@ public: CLDepthConcatenate(); /** Initialise the kernel's inputs vector and output. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. - * @param[out] output Output tensor. Data types supported: F32. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. */ void configure(std::vector inputs_vector, ICLTensor *output); diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h index 02ff1227c7..cc65099575 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h @@ -26,14 +26,15 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" + #include #include namespace arm_compute { class ITensor; -class NEDepthConcatenateKernel; -class NEFillBorderKernel; /** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: * @@ -48,8 +49,8 @@ public: NEDepthConcatenate(); /** Initialise the kernel's inputs vector and output. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F32. - * @param[out] output Output tensor. Data types supported: F32. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p inputs_vector. */ void configure(std::vector inputs_vector, ITensor *output); -- cgit v1.2.1