From 164a2727d3bbce0e575d24b7db787c85e2e2c203 Mon Sep 17 00:00:00 2001 From: giuros01 Date: Tue, 20 Nov 2018 18:34:46 +0000 Subject: COMPMID-1717: CL: Implement Maximum, Minimum, SquaredDifference Change-Id: Ice653e48211053bd3cd20a693bd76de6b4efc370 Reviewed-on: https://review.mlplatform.org/270 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- arm_compute/runtime/CL/CLFunctions.h | 4 +- .../runtime/CL/functions/CLArithmeticAddition.h | 64 ------- .../runtime/CL/functions/CLArithmeticDivision.h | 62 ------- .../runtime/CL/functions/CLArithmeticSubtraction.h | 67 ------- .../runtime/CL/functions/CLElementwiseOperations.h | 206 +++++++++++++++++++++ .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 20 +- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 145 +++++++-------- .../runtime/CL/functions/CLLaplacianPyramid.h | 4 +- .../runtime/CL/functions/CLLaplacianReconstruct.h | 2 +- arm_compute/runtime/CL/functions/CLRNNLayer.h | 22 +-- arm_compute/runtime/CL/functions/CLReduceMean.h | 2 +- 11 files changed, 304 insertions(+), 294 deletions(-) delete mode 100644 arm_compute/runtime/CL/functions/CLArithmeticAddition.h delete mode 100644 arm_compute/runtime/CL/functions/CLArithmeticDivision.h delete mode 100644 arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h create mode 100644 arm_compute/runtime/CL/functions/CLElementwiseOperations.h (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 780597ef07..e68e719a13 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -29,9 +29,6 @@ #include "arm_compute/runtime/CL/functions/CLAccumulate.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticDivision.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" #include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" #include "arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h" #include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h" @@ -63,6 +60,7 @@ #include "arm_compute/runtime/CL/functions/CLDerivative.h" #include "arm_compute/runtime/CL/functions/CLDilate.h" #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h" #include "arm_compute/runtime/CL/functions/CLErode.h" #include "arm_compute/runtime/CL/functions/CLFastCorners.h" diff --git a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h b/arm_compute/runtime/CL/functions/CLArithmeticAddition.h deleted file mode 100644 index 5aba60ad01..0000000000 --- a/arm_compute/runtime/CL/functions/CLArithmeticAddition.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLARITHMETICADDITION_H__ -#define __ARM_COMPUTE_CLARITHMETICADDITION_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLArithmeticAdditionKernel - * - * @note The tensor data type for the inputs must be U8/S16/F16/F32. - * @note The function performs an arithmetic addition between two tensors. - */ -class CLArithmeticAddition : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and convertion policy. - * - * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticAddition - * - * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); -}; -} -#endif /* __ARM_COMPUTE_CLARITHMETICADDITION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticDivision.h b/arm_compute/runtime/CL/functions/CLArithmeticDivision.h deleted file mode 100644 index c91435cee9..0000000000 --- a/arm_compute/runtime/CL/functions/CLArithmeticDivision.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLARITHMETICDIVISION_H__ -#define __ARM_COMPUTE_CLARITHMETICDIVISION_H__ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLArithmeticDivisionKernel - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an arithmetic division between two tensors. - */ -class CLArithmeticDivision : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's inputs, output. - * - * @param[in, out] input1 First tensor input. Data types supported: F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 Second tensor input. Same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision - * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); -}; -} -#endif /* __ARM_COMPUTE_CLARITHMETICDIVISION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h b/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h deleted file mode 100644 index 2940044ed9..0000000000 --- a/arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ -#define __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ - -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" - -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Basic function to run @ref CLArithmeticSubtractionKernel - * - * @note The tensor data type for the inputs must be U8/S16/F16/F32. - * @note The function performs an arithmetic subtraction between two tensors. - * - * This function calls the following kernels: - * -# @ref CLFillBorderKernel (In case of broadcasting, in the input being broadcasted) - * -# @ref CLArithmeticSubtractionKernel - */ -class CLArithmeticSubtraction : public ICLSimpleFunction -{ -public: - /** Initialise the kernel's inputs, output and convertion policy. - * - * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - */ - void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticSubtraction - * - * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), S16/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); -}; -} -#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTION_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h new file mode 100644 index 0000000000..4a0911ec4e --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARI SING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H__ +#define __ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for addition + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32. + * @note The function performs an arithmetic addition between two tensors. + */ +class CLArithmeticAddition : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); +}; + +/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for subtraction + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32. + * @note The function performs an arithmetic subtraction between two tensors. + */ +class CLArithmeticSubtraction : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * @param[in] policy Policy to use to handle overflow. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy); +}; + +/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for division + * + * @note The tensor data type for the inputs must be F16/F32. + * @note The function performs an arithmetic division between two tensors. + */ +class CLArithmeticDivision : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output. + * + * @param[in, out] input1 First tensor input. Data types supported: F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Same as @p input1. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: Same as @p input1. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision + * + * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. + * @param[in] output Output tensor info. Data types supported: Same as @p input1. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); +}; + +/** Basic function to run @ref CLArithmeticOperationKernel for max + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32. + * @note The function performs a max operation between two tensors. + */ +class CLElementwiseMax : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); +}; + +/** Basic function to run @ref CLArithmeticOperationKernel for min + * + * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32. + * @note The function performs a max operation between two tensors. + */ +class CLElementwiseMin : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); +}; + +/** Basic function to run @ref CLArithmeticOperationKernel for squared difference + * + * @note The tensor data type for the inputs must be QASYMM8/U8/S16/F16/F32. + * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 + */ +class CLElementwiseSquaredDiff : public ICLSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and conversion policy. + * + * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32. + */ + void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference + * + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/F16/F32. + * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32. + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32. + * + * @return a status + */ + static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLELEMENTWISEOPERATIONS_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index fbf0c08b36..1468b156eb 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -26,8 +26,8 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" +#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" @@ -90,7 +90,7 @@ private: * -# @ref CLGEMM (if the data type is FP32 or FP16) * -# @ref CLGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8) * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8) - * -# @ref CLArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) + * -# @ref CLElementwiseOperationKernel for addition (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout) * -# @ref CLCol2ImKernel (if NCHW data layout) */ class CLGEMMConvolutionLayer : public IFunction @@ -185,14 +185,14 @@ private: int gemm_3d_depth = 1, bool skip_im2col = false); private: - CLMemoryGroup _memory_group; - CLConvolutionLayerReshapeWeights _reshape_weights; - CLIm2ColKernel _im2col_kernel; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLCol2ImKernel _col2im_kernel; - CLActivationLayer _activationlayer_function; - CLArithmeticAdditionKernel _add_bias_kernel; + CLMemoryGroup _memory_group; + CLConvolutionLayerReshapeWeights _reshape_weights; + CLIm2ColKernel _im2col_kernel; + CLGEMM _mm_gemm; + CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + CLCol2ImKernel _col2im_kernel; + CLActivationLayer _activationlayer_function; + CLSaturatedArithmeticOperationKernel _add_bias_kernel; const ICLTensor *_original_weights; diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 72e41a7aca..87fb1190b7 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -27,14 +27,13 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" -#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h" #include "arm_compute/core/CL/kernels/CLCopyKernel.h" +#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" @@ -141,76 +140,76 @@ public: void run() override; private: - CLMemoryGroup _memory_group; - CLFullyConnectedLayer _fully_connected_input_gate; - CLGEMM _gemm_input_gate; - CLTransposeKernel _transpose_input_gate; - CLArithmeticAdditionKernel _accum_input_gate1; - CLArithmeticAddition _accum_input_gate2; - CLArithmeticSubtractionKernel _subtract_input_gate; - CLPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - CLActivationLayerKernel _activation_input_gate; - CLFullyConnectedLayer _fully_connected_forget_gate; - CLGEMM _gemm_forget_gate; - CLTransposeKernel _transpose_forget_gate; - CLArithmeticAdditionKernel _accum_forget_gate1; - CLArithmeticAddition _accum_forget_gate2; - CLPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - CLActivationLayerKernel _activation_forget_gate; - CLFullyConnectedLayer _fully_connected_cell_state; - CLGEMM _gemm_cell_state1; - CLGEMM _gemm_cell_state2; - CLTransposeKernel _transpose_cell_state; - CLArithmeticAdditionKernel _accum_cell_state1; - CLArithmeticAdditionKernel _accum_cell_state2; - CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - CLActivationLayerKernel _activation_cell_state; - CLActivationLayerKernel _cell_clip; - CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; - CLFullyConnectedLayer _fully_connected_output; - CLGEMM _gemm_output; - CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; - CLTransposeKernel _transpose_output; - CLArithmeticAdditionKernel _accum_output1; - CLArithmeticAddition _accum_output2; - CLActivationLayerKernel _activation_output; - CLActivationLayerKernel _activation_output_state; - CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; - CLFullyConnectedLayer _fully_connected_output_state; - CLGEMM _gemm_output_state; - CLArithmeticAdditionKernel _accum_output_state; - CLActivationLayerKernel _projection_clip; - CLCopyKernel _copy_cell_state; - CLCopyKernel _copy_output; - CLWidthConcatenateLayer _concat_scratch_buffer; - CLTensor _input_gate_out1; - CLTensor _input_gate_out2; - CLTensor _input_gate_out3; - CLTensor _input_gate_out4; - CLTensor _input_gate_out5; - CLTensor _forget_gate_out1; - CLTensor _forget_gate_out2; - CLTensor _forget_gate_out3; - CLTensor _forget_gate_out4; - CLTensor _forget_gate_out5; - CLTensor _cell_state_out1; - CLTensor _cell_state_out2; - CLTensor _cell_state_out3; - CLTensor _cell_state_out4; - CLTensor _cell_state_out5; - CLTensor _output1; - CLTensor _output2; - CLTensor _output3; - CLTensor _output4; - CLTensor _output5; - CLTensor _cell_state_activation; - CLTensor _output_state1; - CLTensor _ones; - bool _run_peephole_opt; - bool _run_cifg_opt; - bool _perform_cell_clipping; - bool _has_projection_weights; - bool _perform_projection_clipping; + CLMemoryGroup _memory_group; + CLFullyConnectedLayer _fully_connected_input_gate; + CLGEMM _gemm_input_gate; + CLTransposeKernel _transpose_input_gate; + CLSaturatedArithmeticOperationKernel _accum_input_gate1; + CLArithmeticAddition _accum_input_gate2; + CLSaturatedArithmeticOperationKernel _subtract_input_gate; + CLPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; + CLActivationLayerKernel _activation_input_gate; + CLFullyConnectedLayer _fully_connected_forget_gate; + CLGEMM _gemm_forget_gate; + CLTransposeKernel _transpose_forget_gate; + CLSaturatedArithmeticOperationKernel _accum_forget_gate1; + CLArithmeticAddition _accum_forget_gate2; + CLPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; + CLActivationLayerKernel _activation_forget_gate; + CLFullyConnectedLayer _fully_connected_cell_state; + CLGEMM _gemm_cell_state1; + CLGEMM _gemm_cell_state2; + CLTransposeKernel _transpose_cell_state; + CLSaturatedArithmeticOperationKernel _accum_cell_state1; + CLSaturatedArithmeticOperationKernel _accum_cell_state2; + CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; + CLActivationLayerKernel _activation_cell_state; + CLActivationLayerKernel _cell_clip; + CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; + CLFullyConnectedLayer _fully_connected_output; + CLGEMM _gemm_output; + CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; + CLTransposeKernel _transpose_output; + CLSaturatedArithmeticOperationKernel _accum_output1; + CLArithmeticAddition _accum_output2; + CLActivationLayerKernel _activation_output; + CLActivationLayerKernel _activation_output_state; + CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; + CLFullyConnectedLayer _fully_connected_output_state; + CLGEMM _gemm_output_state; + CLSaturatedArithmeticOperationKernel _accum_output_state; + CLActivationLayerKernel _projection_clip; + CLCopyKernel _copy_cell_state; + CLCopyKernel _copy_output; + CLWidthConcatenateLayer _concat_scratch_buffer; + CLTensor _input_gate_out1; + CLTensor _input_gate_out2; + CLTensor _input_gate_out3; + CLTensor _input_gate_out4; + CLTensor _input_gate_out5; + CLTensor _forget_gate_out1; + CLTensor _forget_gate_out2; + CLTensor _forget_gate_out3; + CLTensor _forget_gate_out4; + CLTensor _forget_gate_out5; + CLTensor _cell_state_out1; + CLTensor _cell_state_out2; + CLTensor _cell_state_out3; + CLTensor _cell_state_out4; + CLTensor _cell_state_out5; + CLTensor _output1; + CLTensor _output2; + CLTensor _output3; + CLTensor _output4; + CLTensor _output5; + CLTensor _cell_state_activation; + CLTensor _output_state1; + CLTensor _ones; + bool _run_peephole_opt; + bool _run_cifg_opt; + bool _perform_cell_clipping; + bool _has_projection_weights; + bool _perform_projection_clipping; }; } #endif /* __ARM_COMPUTE_CLLSTMLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h index 585a013e31..ae86e931df 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLPyramid.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h" #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h" #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h" #include "arm_compute/runtime/IFunction.h" diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h index 6905b03652..622b049f11 100644 --- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h +++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h @@ -26,8 +26,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLPyramid.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h" #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLScale.h" #include "arm_compute/runtime/IFunction.h" diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h index ab7407dbfc..fc86992bdf 100644 --- a/arm_compute/runtime/CL/functions/CLRNNLayer.h +++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h @@ -25,8 +25,8 @@ #define __ARM_COMPUTE_CLRNN_LAYER_H__ #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLCopyKernel.h" +#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLGEMM.h" @@ -72,16 +72,16 @@ public: void prepare() override; private: - CLMemoryGroup _memory_group; - CLGEMM _gemm_state_f; - CLArithmeticAdditionKernel _add_kernel; - CLActivationLayerKernel _activation_kernel; - CLFullyConnectedLayer _fully_connected_kernel; - CLCopyKernel _copy_kernel; - CLTensor _fully_connected_out; - CLTensor _gemm_output; - CLTensor _add_output; - bool _is_prepared; + CLMemoryGroup _memory_group; + CLGEMM _gemm_state_f; + CLSaturatedArithmeticOperationKernel _add_kernel; + CLActivationLayerKernel _activation_kernel; + CLFullyConnectedLayer _fully_connected_kernel; + CLCopyKernel _copy_kernel; + CLTensor _fully_connected_out; + CLTensor _gemm_output; + CLTensor _add_output; + bool _is_prepared; }; } #endif /* __ARM_COMPUTE_CLRNN_LAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h index 5a919e5dcd..ba10134a00 100644 --- a/arm_compute/runtime/CL/functions/CLReduceMean.h +++ b/arm_compute/runtime/CL/functions/CLReduceMean.h @@ -25,7 +25,7 @@ #define __ARM_COMPUTE_CL_REDUCE_MEAN_H__ #include "arm_compute/runtime/CL/ICLSimpleFunction.h" -#include "arm_compute/runtime/CL/functions/CLArithmeticDivision.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" #include "arm_compute/runtime/CL/functions/CLReductionOperation.h" #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" #include "arm_compute/runtime/IMemoryManager.h" -- cgit v1.2.1