From 1167487ea8e54a76d0a3625e0aa84e2ad9ffd317 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 7 Feb 2018 15:38:12 +0000 Subject: COMPMID-897 Merge batch normalization with bounded relu Change-Id: I9a607fe620f795cdea1a99fdd3f5f8c2fc76f980 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119234 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice Reviewed-by: Georgios Pinitas --- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 45 ++++---- .../kernels/GCBatchNormalizationLayerKernel.h | 22 ++-- arm_compute/core/Types.h | 14 ++- arm_compute/graph/nodes/BatchNormalizationLayer.h | 28 ++--- .../CL/functions/CLBatchNormalizationLayer.h | 45 ++++---- .../functions/GCBatchNormalizationLayer.h | 22 ++-- .../NEON/functions/NEBatchNormalizationLayer.h | 49 ++++---- examples/graph_inception_v3.cpp | 123 +++++++-------------- examples/graph_mobilenet.cpp | 10 +- src/core/CL/cl_kernels/batchnormalization_layer.cl | 21 +++- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 49 +++++--- .../cs_shaders/batchnormalization_layer.cs | 22 +++- .../kernels/GCBatchNormalizationLayerKernel.cpp | 20 +++- src/graph/nodes/BatchNormalizationLayer.cpp | 3 +- src/graph/operations/CLSimpleOperations.cpp | 22 ++-- src/graph/operations/NESimpleOperations.cpp | 20 ++-- .../CL/functions/CLBatchNormalizationLayer.cpp | 11 +- .../functions/GCBatchNormalizationLayer.cpp | 7 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 27 ++++- tests/benchmark/CL/BatchNormalizationLayer.cpp | 26 ++++- .../GLES_COMPUTE/BatchNormalizationLayer.cpp | 28 ++++- tests/benchmark/NEON/BatchNormalizationLayer.cpp | 27 ++++- .../fixtures/BatchNormalizationLayerFixture.h | 4 +- .../MobileNetBatchNormalizationLayerDataset.h | 68 ++++++++++++ tests/validation/CL/BatchNormalizationLayer.cpp | 49 ++++++-- .../GLES_COMPUTE/BatchNormalizationLayer.cpp | 14 ++- tests/validation/NEON/BatchNormalizationLayer.cpp | 45 ++++++-- .../fixtures/BatchNormalizationLayerFixture.h | 20 ++-- .../reference/BatchNormalizationLayer.cpp | 24 ++-- .../validation/reference/BatchNormalizationLayer.h | 8 +- 30 files changed, 571 insertions(+), 302 deletions(-) mode change 100755 => 100644 tests/benchmark/GLES_COMPUTE/BatchNormalizationLayer.cpp create mode 100644 tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h index 8643d83bcc..fee5dd3bae 100644 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,35 +52,38 @@ public: * * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon, + ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon); + float epsilon, ActivationLayerInfo act_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h index 2bbd6a83fe..15d7f79afb 100644 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,16 +50,18 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon); + void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overridden: void run(const Window &window) override; diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 5a08ac9153..3affe7e8ec 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -713,6 +713,7 @@ public: LINEAR /**< Linear ( \f$ f(x)= ax + b \f$ ) */ }; + ActivationLayerInfo() = default; /** Default Constructor * * @param[in] f The activation function to use. @@ -721,7 +722,7 @@ public: * @param[in] b (Optional) The beta parameter used by some activation functions (@ref ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref ActivationFunction::TANH). */ ActivationLayerInfo(ActivationFunction f, float a = 0.0f, float b = 0.0f) - : _act(f), _a(a), _b(b) + : _act(f), _a(a), _b(b), _enabled(true) { } ActivationFunction activation() const @@ -736,11 +737,16 @@ public: { return _b; } + bool enabled() const + { + return _enabled; + } private: - ActivationFunction _act; - float _a; - float _b; + ActivationFunction _act = { ActivationLayerInfo::ActivationFunction::LOGISTIC }; + float _a = {}; + float _b = {}; + bool _enabled = { false }; }; /** Normalization Layer Information class */ diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h index df7b1d19a9..266c3905d8 100644 --- a/arm_compute/graph/nodes/BatchNormalizationLayer.h +++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,15 +40,16 @@ class BatchNormalizationLayer final : public INode public: /** Default constructor * - * @param[in] mean Mean values tensor - * @param[in] var Var values tensor - * @param[in] gamma Gamma values tensor - * @param[in] beta Beta values tensor - * @param[in] epsilon Epsilon value + * @param[in] mean Mean values tensor + * @param[in] var Var values tensor + * @param[in] gamma Gamma values tensor + * @param[in] beta Beta values tensor + * @param[in] epsilon Epsilon value + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ template - BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon) - : _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon) + BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon, ActivationLayerInfo act_info = ActivationLayerInfo()) + : _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon), _act_info(act_info) { } @@ -56,11 +57,12 @@ public: std::unique_ptr instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output) override; private: - Tensor _mean; - Tensor _var; - Tensor _gamma; - Tensor _beta; - float _epsilon; + Tensor _mean; + Tensor _var; + Tensor _gamma; + Tensor _beta; + float _epsilon; + ActivationLayerInfo _act_info; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index 127de10555..3d5145a697 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,35 +48,38 @@ public: * * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon, + ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayer * - * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon); + float epsilon, ActivationLayerInfo act_info); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h index 9d81b9a7f7..01e53d26f5 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,16 +46,18 @@ public: GCBatchNormalizationLayer(); /** Set the input and output tensors. * - * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon); + void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 1933468afc..5c8200beda 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" namespace arm_compute { @@ -47,41 +48,47 @@ public: * * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); + void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, + ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayer * - * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. - * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input - * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] epsilon Small value to avoid division with zero. + * @param[in] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result. + * 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon); + float epsilon, ActivationLayerInfo act_info); // Inherited methods overridden: void run() override; private: NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */ + // COMPMID-906 Use fused activation in NEON Batch normalization + NEActivationLayer _act_func; + bool _act_info_enabled; }; } #endif /* __ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H__ */ diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index 6e31f5e669..88a0325b63 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -101,8 +101,7 @@ public: "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"), @@ -113,8 +112,7 @@ public: "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"), @@ -125,8 +123,7 @@ public: "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) @@ -139,8 +136,7 @@ public: "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer(3U, 3U, 192U, get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"), @@ -151,8 +147,7 @@ public: "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) @@ -231,8 +226,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b; i_b << ConvolutionLayer( @@ -245,8 +239,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 5U, 5U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"), @@ -257,8 +250,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c; i_c << ConvolutionLayer( @@ -271,8 +263,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<1>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), @@ -283,8 +274,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<2>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"), @@ -295,8 +285,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_d; i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) @@ -310,8 +299,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -332,8 +320,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b; i_b << ConvolutionLayer( @@ -346,8 +333,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"), @@ -358,8 +344,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"), @@ -370,8 +355,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c; i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) @@ -399,8 +383,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b; i_b << ConvolutionLayer( @@ -413,8 +396,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), @@ -425,8 +407,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), @@ -437,8 +418,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c; i_c << ConvolutionLayer( @@ -451,8 +431,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 1U, 7U, std::get<1>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"), @@ -463,8 +442,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 7U, 1U, std::get<2>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"), @@ -475,8 +453,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 1U, 7U, std::get<3>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"), @@ -487,8 +464,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 7U, 1U, std::get<4>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"), @@ -499,8 +475,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_d; i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) @@ -514,8 +489,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -536,8 +510,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<1>(a_filters), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"), @@ -548,8 +521,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b; i_b << ConvolutionLayer( @@ -562,8 +534,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 7U, 1U, std::get<1>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"), @@ -574,8 +545,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 1U, 7U, std::get<2>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"), @@ -586,8 +556,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<3>(b_filters), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"), @@ -598,8 +567,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c; i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) @@ -635,8 +603,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b1; i_b1 << ConvolutionLayer( @@ -649,8 +616,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b2; i_b2 << ConvolutionLayer( @@ -663,8 +629,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_b; i_b << ConvolutionLayer( @@ -677,8 +642,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_b1), std::move(i_b2)); SubGraph i_c1; @@ -692,8 +656,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c2; i_c2 << ConvolutionLayer( @@ -706,8 +669,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); SubGraph i_c; i_c << ConvolutionLayer( @@ -720,8 +682,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << ConvolutionLayer( 3U, 3U, std::get<1>(c_filters), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"), @@ -732,8 +693,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) << BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_c1), std::move(i_c2)); SubGraph i_d; @@ -748,8 +708,7 @@ private: get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 6d3a88e540..d3d4774eaa 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -122,9 +122,7 @@ public: get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"), get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)) << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)) @@ -174,8 +172,7 @@ private: get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) << ConvolutionLayer( 1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), @@ -186,8 +183,7 @@ private: get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"), get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"), - 0.001f) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)); + 0.001f, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)); return BranchLayer(std::move(sg)); } diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index fbffefb3c0..5ddeb1a6a1 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -23,6 +23,8 @@ */ #include "helpers.h" +#if defined(VEC_SIZE) && defined(DATA_TYPE) + #if defined(FIXED_POINT_POSITION) #include "fixed_point.h" @@ -42,6 +44,16 @@ #endif /* FIXED_POINT_POSITION */ +#if defined(LU_BRELU) +#define ACTIVATION_FUNC(x) CLAMP(x, (DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL) +#elif defined(BRELU) +#define ACTIVATION_FUNC(x) CLAMP(x, (DATA_TYPE)0, (DATA_TYPE)A_VAL) +#elif defined(RELU) +#define ACTIVATION_FUNC(x) max(x, (DATA_TYPE)0) +#else /* FUSED_ACT */ +#define ACTIVATION_FUNC(x) (x) +#endif /* FUSED_ACT */ + /** Apply batch normalization. * * @param[in] input_ptr Pointer to the first source tensor. Supported data types: QS8/QS16/F16/F32 @@ -126,6 +138,13 @@ __kernel void batchnormalization_layer(TENSOR3D_DECLARATION(input), gamma_vec = *((__global DATA_TYPE *)(gamma.ptr + current_slice * gamma.stride_x)); beta_vec = *((__global DATA_TYPE *)(beta.ptr + current_slice * beta.stride_x)); + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + res = ADD_OP(MUL_OP(gamma_vec, x_bar), beta_vec); + + res = ACTIVATION_FUNC(res); + VSTORE(VEC_SIZE) - (ADD_OP(MUL_OP(gamma_vec, x_bar), beta_vec), 0, (__global DATA_TYPE *)out.ptr); + (res, 0, (__global DATA_TYPE *)out.ptr); } + +#endif /* defined(VEC_SIZE) && defined(DATA_TYPE) */ \ No newline at end of file diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 663b044b5d..95487a23db 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { ARM_COMPUTE_UNUSED(epsilon); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); @@ -50,6 +50,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var, beta, gamma); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, mean, var, beta, gamma); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != mean->dimension(0)); + if(act_info.enabled()) + { + ActivationLayerInfo::ActivationFunction act = act_info.activation(); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON(act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU + && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); + ARM_COMPUTE_RETURN_ERROR_ON(act_info.b() > act_info.a()); + } if(output != nullptr && output->total_size() != 0) { @@ -98,7 +106,7 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() } void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, var, beta, gamma); @@ -118,22 +126,22 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out } ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, - mean->info(), var->info(), beta->info(), gamma->info(), epsilon)); + mean->info(), var->info(), beta->info(), gamma->info(), epsilon, act_info)); const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); // Set build options - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.emplace(output == nullptr ? "-DIN_PLACE" : ""); - if(is_data_type_fixed_point(input->info()->data_type())) - { - build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); - } + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option_if(act_info.enabled(), "-D" + string_from_activation_func(act_info.activation())); + build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + build_opts.add_option_if(output == nullptr, "-DIN_PLACE"); + build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts.options())); // Set kernel static arguments unsigned int include_output = (output != nullptr) ? 1 : 0; @@ -144,14 +152,23 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); + + _config_id = "batch_normalization_layer_"; + _config_id += string_from_data_type(input->info()->data_type()); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(2)); } Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first); return Status{}; @@ -182,7 +199,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue { add_3D_tensor_argument(idx, _output, slice); } - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs index 53fb51557c..7629b255b7 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,16 @@ precision mediump float; #define INVSQRT_OP(a) inversesqrt((a)) #define SQCVT_SAT(a) (a) +#if defined(LU_BRELU) +#define ACTIVATION_FUNC(x) min(max(x, float(B_VAL)), float(A_VAL)) +#elif defined(BRELU) +#define ACTIVATION_FUNC(x) min(max(x, float(0)), float(A_VAL)) +#elif defined(RELU) +#define ACTIVATION_FUNC(x) max(x, float(0)) +#else /* defined(FUSED_ACT) */ +#define ACTIVATION_FUNC(x) (x) +#endif /* defined(FUSED_ACT) */ + /** Apply batch normalization. * * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32" @@ -102,7 +112,7 @@ void main(void) gamma_param = LOAD(gamma_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(gamma_iter, current_slice * beta_attrs.stride_x)); beta_param = LOAD(beta_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(beta_iter, current_slice * beta_attrs.stride_x)); - STORE_CURRENT_ITEM(dst_ptr, dst_iter, ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); + STORE_CURRENT_ITEM(dst_ptr, dst_iter, ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param))); } #elif defined(DATA_TYPE_FP16) @@ -148,7 +158,7 @@ void main(void) gamma_param = unpacked_s[3].x; beta_param = unpacked_s[4].x; - result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param); + result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result); } @@ -163,7 +173,7 @@ void main(void) gamma_param = unpacked_s[3].y; beta_param = unpacked_s[4].y; - result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param); + result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result); } @@ -178,7 +188,7 @@ void main(void) gamma_param = unpacked_s[3].z; beta_param = unpacked_s[4].z; - result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param); + result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result); } @@ -193,7 +203,7 @@ void main(void) gamma_param = unpacked_s[3].w; beta_param = unpacked_s[4].w; - result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param); + result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result); } diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp index dee2a5579b..a41b62fbab 100644 --- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel() } void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); @@ -54,7 +54,14 @@ void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTenso ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, mean, var, beta, gamma); ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0)); + if(act_info.enabled()) + { + ARM_COMPUTE_ERROR_ON(input->info()->data_type() != DataType::F32 && input->info()->data_type() != DataType::F16); + ARM_COMPUTE_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU + && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU + && act_info.activation() != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); + ARM_COMPUTE_ERROR_ON(act_info.b() > act_info.a()); + } _input = input; _output = output; @@ -79,6 +86,13 @@ void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTenso build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); + if(act_info.enabled()) + { + build_opts.emplace("#define " + string_from_activation_func(act_info.activation())); + build_opts.emplace("#define A_VAL " + float_to_string_with_full_precision(act_info.a())); + build_opts.emplace("#define B_VAL " + float_to_string_with_full_precision(act_info.b())); + } + // Create kernel _kernel = static_cast(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); diff --git a/src/graph/nodes/BatchNormalizationLayer.cpp b/src/graph/nodes/BatchNormalizationLayer.cpp index 7851aa5b9e..24287ac61a 100644 --- a/src/graph/nodes/BatchNormalizationLayer.cpp +++ b/src/graph/nodes/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,7 @@ std::unique_ptr BatchNormalizationLayer::instantiate_nod node_ctx.add_input(_gamma.tensor()); node_ctx.add_output(out); node_ctx.add_parameter("epsilon", _epsilon); + node_ctx.add_parameter("act_info", _act_info); // Configure operation auto func = OperationRegistry::get().find_operation(OperationType::BatchNormalizationLayer, _target_hint)->configure(node_ctx); diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp index 61315e73b2..94e3fe15f7 100644 --- a/src/graph/operations/CLSimpleOperations.cpp +++ b/src/graph/operations/CLSimpleOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -79,17 +79,18 @@ REGISTER_SIMPLE_OPERATION(CLBatchNormalizationLayerOperation, OPENCL, OperationT ARM_COMPUTE_ERROR_ON(dynamic_cast(ctx.output(0)) == nullptr); // Extract IO and info - auto *in = dynamic_cast(ctx.input(0)); - auto *mean = dynamic_cast(ctx.input(1)); - auto *var = dynamic_cast(ctx.input(2)); - auto *beta = dynamic_cast(ctx.input(3)); - auto *gamma = dynamic_cast(ctx.input(4)); - auto *out = dynamic_cast(ctx.output(0)); - const auto epsilon = ctx.parameter("epsilon"); + auto *in = dynamic_cast(ctx.input(0)); + auto *mean = dynamic_cast(ctx.input(1)); + auto *var = dynamic_cast(ctx.input(2)); + auto *beta = dynamic_cast(ctx.input(3)); + auto *gamma = dynamic_cast(ctx.input(4)); + auto *out = dynamic_cast(ctx.output(0)); + const auto epsilon = ctx.parameter("epsilon"); + const auto act_info = ctx.parameter("act_info"); // Create and configure function auto batch_norm = arm_compute::support::cpp14::make_unique(); - batch_norm->configure(in, out, mean, var, beta, gamma, epsilon); + batch_norm->configure(in, out, mean, var, beta, gamma, epsilon, act_info); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLBatchNormalizationLayer" @@ -101,6 +102,9 @@ REGISTER_SIMPLE_OPERATION(CLBatchNormalizationLayerOperation, OPENCL, OperationT << " Beta shape: " << beta->info()->tensor_shape() << " Gamma shape: " << gamma->info()->tensor_shape() << " Epsilon: " << epsilon + << " Activation function: " << act_info.activation() + << " a: " << act_info.a() + << " b: " << act_info.b() << std::endl); return std::move(batch_norm); diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp index 5a00e230ea..265bed6b7a 100644 --- a/src/graph/operations/NESimpleOperations.cpp +++ b/src/graph/operations/NESimpleOperations.cpp @@ -79,17 +79,18 @@ REGISTER_SIMPLE_OPERATION(NEBatchNormalizationLayerOperation, NEON, OperationTyp ARM_COMPUTE_ERROR_ON(dynamic_cast(ctx.output(0)) == nullptr); // Extract IO and info - auto *in = dynamic_cast(ctx.input(0)); - auto *mean = dynamic_cast(ctx.input(1)); - auto *var = dynamic_cast(ctx.input(2)); - auto *beta = dynamic_cast(ctx.input(3)); - auto *gamma = dynamic_cast(ctx.input(4)); - auto *out = dynamic_cast(ctx.output(0)); - const auto epsilon = ctx.parameter("epsilon"); + auto *in = dynamic_cast(ctx.input(0)); + auto *mean = dynamic_cast(ctx.input(1)); + auto *var = dynamic_cast(ctx.input(2)); + auto *beta = dynamic_cast(ctx.input(3)); + auto *gamma = dynamic_cast(ctx.input(4)); + auto *out = dynamic_cast(ctx.output(0)); + const auto epsilon = ctx.parameter("epsilon"); + const auto act_info = ctx.parameter("act_info"); // Create and configure function auto batch_norm = arm_compute::support::cpp14::make_unique(); - batch_norm->configure(in, out, mean, var, beta, gamma, epsilon); + batch_norm->configure(in, out, mean, var, beta, gamma, epsilon, act_info); // Log info ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEBatchNormalizationLayer" @@ -101,6 +102,9 @@ REGISTER_SIMPLE_OPERATION(NEBatchNormalizationLayerOperation, NEON, OperationTyp << " Beta shape: " << beta->info()->tensor_shape() << " Gamma shape: " << gamma->info()->tensor_shape() << " Epsilon: " << epsilon + << " Activation function: " << act_info.activation() + << " a: " << act_info.a() + << " b: " << act_info.b() << std::endl); return std::move(batch_norm); diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp index 58215c3c3e..f87ea6ea06 100644 --- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,17 +37,18 @@ CLBatchNormalizationLayer::CLBatchNormalizationLayer() { } -void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon) +void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon, + ActivationLayerInfo act_info) { - _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); + _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info); } Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { - return CLBatchNormalizationLayerKernel::validate(input, output, mean, var, beta, gamma, epsilon); + return CLBatchNormalizationLayerKernel::validate(input, output, mean, var, beta, gamma, epsilon, act_info); } void CLBatchNormalizationLayer::run() diff --git a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp index 99bdf43c05..cc5e8f49f2 100755 --- a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,9 +37,10 @@ GCBatchNormalizationLayer::GCBatchNormalizationLayer() { } -void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon) +void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon, + ActivationLayerInfo act_info) { - _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); + _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon, act_info); } void GCBatchNormalizationLayer::run() diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp index f6be00169d..b3753e842d 100644 --- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,23 +33,40 @@ using namespace arm_compute; NEBatchNormalizationLayer::NEBatchNormalizationLayer() - : _norm_kernel() + : _norm_kernel(), _act_func(), _act_info_enabled(false) { } -void NEBatchNormalizationLayer::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon) +void NEBatchNormalizationLayer::configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, + ActivationLayerInfo act_info) { + _act_info_enabled = act_info.enabled(); + // Configure kernel _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); + if(_act_info_enabled) + { + _act_func.configure(output, nullptr, act_info); + } } Status NEBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon) + float epsilon, ActivationLayerInfo act_info) { - return NEBatchNormalizationLayerKernel::validate(input, output, mean, var, beta, gamma, epsilon); + ARM_COMPUTE_RETURN_ON_ERROR(NEBatchNormalizationLayerKernel::validate(input, output, mean, var, beta, gamma, epsilon)); + if(act_info.enabled()) + { + ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info)); + } + + return Status{}; } void NEBatchNormalizationLayer::run() { NEScheduler::get().schedule(&_norm_kernel, Window::DimY); + if(_act_info_enabled) + { + _act_func.run(); + } } diff --git a/tests/benchmark/CL/BatchNormalizationLayer.cpp b/tests/benchmark/CL/BatchNormalizationLayer.cpp index 0fc8727241..a61e7cc745 100644 --- a/tests/benchmark/CL/BatchNormalizationLayer.cpp +++ b/tests/benchmark/CL/BatchNormalizationLayer.cpp @@ -29,6 +29,7 @@ #include "tests/CL/CLAccessor.h" #include "tests/benchmark/fixtures/BatchNormalizationLayerFixture.h" #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4BatchNormalizationLayerDataset.h" +#include "tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h" #include "tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -47,24 +48,41 @@ using CLBatchNormalizationLayerFixture = BatchNormalizationLayerFixture - void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, DataType data_type, int batches) + void setup(TensorShape tensor_shape, TensorShape param_shape, float epsilon, ActivationLayerInfo act_info, DataType data_type, int batches) { // Set batched in source and destination shapes const unsigned int fixed_point_position = 4; @@ -55,7 +55,7 @@ public: gamma = create_tensor(param_shape, data_type, 1, fixed_point_position); // Create and configure function - batch_norm_layer.configure(&src, &dst, &mean, &variance, &beta, &gamma, epsilon); + batch_norm_layer.configure(&src, &dst, &mean, &variance, &beta, &gamma, epsilon, act_info); // Allocate tensors src.allocator()->allocate(); diff --git a/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h new file mode 100644 index 0000000000..d09ff02ef9 --- /dev/null +++ b/tests/datasets/system_tests/mobilenet/MobileNetBatchNormalizationLayerDataset.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET +#define ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET + +#include "tests/datasets/BatchNormalizationLayerDataset.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class MobileNetBatchNormalizationLayerDataset final : public BatchNormalizationLayerDataset +{ +public: + MobileNetBatchNormalizationLayerDataset() + { + // conv1_bn, dwc0_bn + add_config(TensorShape(112U, 112U, 32U), TensorShape(32U), 0.001f); + // pwc0_bn + add_config(TensorShape(112U, 112U, 64U), TensorShape(64U), 0.001f); + // dwc1_bn + add_config(TensorShape(56U, 56U, 64U), TensorShape(64U), 0.001f); + // dwc2_bn, pwc1_bn, pwc2_bn + add_config(TensorShape(56U, 56U, 128U), TensorShape(128U), 0.001f); + // dwc3_bn + add_config(TensorShape(28U, 28U, 128U), TensorShape(128U), 0.001f); + // dwc4_bn, pwc3_bn, pwc4_bn + add_config(TensorShape(28U, 28U, 256U), TensorShape(256U), 0.001f); + // dwc5_bn + add_config(TensorShape(14U, 14U, 256U), TensorShape(256U), 0.001f); + // dwc6_bn, dwc7_bn, dwc8_bn, dwc9_bn, dwc10_bn, pwc5_bn, pwc6_bn, pwc7_bn, pwc8_bn, pwc9_bn, pwc10_bn + add_config(TensorShape(14U, 14U, 512U), TensorShape(512U), 0.001f); + // dwc11_bn + add_config(TensorShape(7U, 7U, 512U), TensorShape(512U), 0.001f); + // dwc12_bn, pwc11_bn, pwc12_bn + add_config(TensorShape(7U, 7U, 1024U), TensorShape(1024U), 0.001f); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_MOBILENET_BATCHNORMALIZATION_LAYER_DATASET */ diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp index 30dd70a66a..ef535153f2 100644 --- a/tests/validation/CL/BatchNormalizationLayer.cpp +++ b/tests/validation/CL/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,6 +47,12 @@ constexpr AbsoluteTolerance tolerance_f32(0.00001f); /**< Tolerance value constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr AbsoluteTolerance tolerance_qs8(3.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */ constexpr AbsoluteTolerance tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */ +const auto act_infos = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), +}); } // namespace TEST_SUITE(CL) @@ -80,13 +86,16 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid mean/var/beta/gamma shape TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Fused activation with fixed point not supported + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Unsupported fused activation + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Fused activation's a < b TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), }), @@ -96,6 +105,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), TensorInfo(), })), @@ -106,16 +118,31 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(5U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), + TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), + })), + framework::dataset::make("ActivationLayerInfo",{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 6.f), + ActivationLayerInfo(), + ActivationLayerInfo(), })), - framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})), - input_info, output_info, mvbg_info, expected) + framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, false, true, true})), + input_info, output_info, mvbg_info, act_info, expected) { const auto &mean_info = mvbg_info; const auto &var_info = mvbg_info; const auto &beta_info = mvbg_info; const auto &gamma_info = mvbg_info; - bool has_error = bool(CLBatchNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f)); + bool has_error = bool(CLBatchNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f, act_info)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -123,7 +150,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + act_infos), framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -132,7 +160,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framewor TEST_SUITE_END() TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))), framework::dataset::make("DataType", DataType::F16))) { // Validate output @@ -146,7 +175,8 @@ template using CLBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValidationFixedPointFixture; TEST_SUITE(QS8) -FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), framework::dataset::make("DataType", DataType::QS8)), framework::dataset::make("FractionalBits", 1, 6))) { @@ -156,7 +186,8 @@ FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), framework::dataset::make("DataType", DataType::QS16)), framework::dataset::make("FractionalBits", 1, 14))) { diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp index a82149bdcc..d817fc0e67 100644 --- a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp +++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,12 @@ namespace { constexpr AbsoluteTolerance tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +const auto act_infos = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), +}); } // namespace TEST_SUITE(GC) @@ -78,7 +84,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + act_infos), framework::dataset::make("DataType", DataType::F16))) { // Validate output @@ -87,7 +94,8 @@ FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + act_infos), framework::dataset::make("DataType", DataType::F32))) { // Validate output diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index dfa32bbb07..3501c359db 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,6 +49,12 @@ constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value fo #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ constexpr AbsoluteTolerance tolerance_qs8(3.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */ constexpr AbsoluteTolerance tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */ +const auto act_infos = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), +}); } // namespace TEST_SUITE(NEON) @@ -82,13 +88,15 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Ran // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid mean/var/beta/gamma shape TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Fused activation with fixed point not supported + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Fused activation's a < b TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), }), @@ -98,6 +106,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), TensorInfo(), })), @@ -108,10 +118,23 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(5U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), TensorInfo(TensorShape(2U), 1, DataType::QS8, 2), })), - framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})), - input_info, output_info, mvbg_info, expected) + framework::dataset::make("ActivationLayerInfo",{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 6.f), + ActivationLayerInfo(), + ActivationLayerInfo(), + })), + framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, true, true})), + input_info, output_info, mvbg_info, act_info, expected) { const auto &mean_info = mvbg_info; const auto &var_info = mvbg_info; @@ -120,14 +143,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( bool has_error = bool(NEBatchNormalizationLayer::validate( &input_info.clone()->set_is_resizable(false), output_info.total_size() ? &output_info.clone()->set_is_resizable(false) : nullptr, &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), - &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f)); + &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f, act_info)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* TEST_SUITE(Float) -FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + act_infos), framework::dataset::make("DataType", DataType::F32))) { // Validate output @@ -137,7 +161,8 @@ TEST_SUITE_END() #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(Float16) -FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + act_infos), framework::dataset::make("DataType", DataType::F16))) { // Validate output @@ -151,7 +176,8 @@ template using NEBatchNormalizationLayerFixedPointFixture = BatchNormalizationLayerValidationFixedPointFixture; TEST_SUITE(QS8) -FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), framework::dataset::make("DataType", DataType::QS8)), framework::dataset::make("FractionalBits", 1, 6))) { @@ -161,7 +187,8 @@ FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::RandomBatchNormalizationLayerDataset(), +FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("ActivationInfo", ActivationLayerInfo())), framework::dataset::make("DataType", DataType::QS16)), framework::dataset::make("FractionalBits", 1, 14))) { diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h index 298c9ca411..e02c619249 100644 --- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h +++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,12 +45,12 @@ class BatchNormalizationLayerValidationFixedPointFixture : public framework::Fix { public: template - void setup(TensorShape shape0, TensorShape shape1, float epsilon, DataType dt, int fractional_bits) + void setup(TensorShape shape0, TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fractional_bits) { _fractional_bits = fractional_bits; _data_type = dt; - _target = compute_target(shape0, shape1, epsilon, dt, fractional_bits); - _reference = compute_reference(shape0, shape1, epsilon, dt, fractional_bits); + _target = compute_target(shape0, shape1, epsilon, act_info, dt, fractional_bits); + _reference = compute_reference(shape0, shape1, epsilon, act_info, dt, fractional_bits); } protected: @@ -85,7 +85,7 @@ protected: } } - TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, DataType dt, int fixed_point_position) + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position) { // Create tensors TensorType src = create_tensor(shape0, dt, 1, fixed_point_position); @@ -97,7 +97,7 @@ protected: // Create and configure function FunctionType norm; - norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon); + norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon, act_info); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -130,7 +130,7 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, DataType dt, int fixed_point_position) + SimpleTensor compute_reference(const TensorShape &shape0, const TensorShape &shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, int fixed_point_position) { // Create reference SimpleTensor ref_src{ shape0, dt, 1, fixed_point_position }; @@ -142,7 +142,7 @@ protected: // Fill reference fill(ref_src, ref_mean, ref_var, ref_beta, ref_gamma); - return reference::batch_normalization_layer(ref_src, ref_mean, ref_var, ref_beta, ref_gamma, epsilon, fixed_point_position); + return reference::batch_normalization_layer(ref_src, ref_mean, ref_var, ref_beta, ref_gamma, epsilon, act_info, fixed_point_position); } TensorType _target{}; @@ -156,9 +156,9 @@ class BatchNormalizationLayerValidationFixture : public BatchNormalizationLayerV { public: template - void setup(TensorShape shape0, TensorShape shape1, float epsilon, DataType dt) + void setup(TensorShape shape0, TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt) { - BatchNormalizationLayerValidationFixedPointFixture::setup(shape0, shape1, epsilon, dt, 0); + BatchNormalizationLayerValidationFixedPointFixture::setup(shape0, shape1, epsilon, act_info, dt, 0); } }; } // namespace validation diff --git a/tests/validation/reference/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp index e4446d1694..a9d9f0320d 100644 --- a/tests/validation/reference/BatchNormalizationLayer.cpp +++ b/tests/validation/reference/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,8 @@ */ #include "BatchNormalizationLayer.h" +#include "ActivationLayer.h" + #include "tests/validation/FixedPoint.h" #include "tests/validation/Helpers.h" @@ -37,8 +39,9 @@ namespace reference // Batch Normalization Layer for fixed point type template ::value, int>::type *> SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, const SimpleTensor &gamma, float epsilon, - int fixed_point_position) + ActivationLayerInfo act_info, int fixed_point_position) { + ARM_COMPUTE_UNUSED(act_info); SimpleTensor result(src.shape(), src.data_type()); const auto cols = static_cast(src.shape()[0]); @@ -79,7 +82,7 @@ SimpleTensor batch_normalization_layer(const SimpleTensor &src, const Simp // Batch Normalization Layer for floating point type template ::value, int>::type *> SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, const SimpleTensor &gamma, float epsilon, - int fixed_point_position) + ActivationLayerInfo act_info, int fixed_point_position) { ARM_COMPUTE_UNUSED(fixed_point_position); @@ -103,21 +106,28 @@ SimpleTensor batch_normalization_layer(const SimpleTensor &src, const Simp const float numerator = src[pos] - mean[i]; const float x_bar = numerator / denominator; result[pos] = beta[i] + x_bar * gamma[i]; + ; } } } } + + if(act_info.enabled()) + { + result = activation_layer(result, act_info); + } + return result; } template SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, - const SimpleTensor &gamma, float epsilon, int fixed_point_position); + const SimpleTensor &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position); template SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, - const SimpleTensor &gamma, float epsilon, int fixed_point_position); + const SimpleTensor &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position); template SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, - const SimpleTensor &gamma, float epsilon, int fixed_point_position); + const SimpleTensor &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position); template SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, - const SimpleTensor &gamma, float epsilon, int fixed_point_position); + const SimpleTensor &gamma, float epsilon, ActivationLayerInfo act_info, int fixed_point_position); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/BatchNormalizationLayer.h b/tests/validation/reference/BatchNormalizationLayer.h index 1a554adf7e..329909dab4 100644 --- a/tests/validation/reference/BatchNormalizationLayer.h +++ b/tests/validation/reference/BatchNormalizationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,11 +37,13 @@ namespace reference { template ::value, int>::type * = nullptr> SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, const SimpleTensor &gamma, float epsilon, - int fixed_point_position); + ActivationLayerInfo act_info, + int fixed_point_position); template ::value, int>::type * = nullptr> SimpleTensor batch_normalization_layer(const SimpleTensor &src, const SimpleTensor &mean, const SimpleTensor &var, const SimpleTensor &beta, const SimpleTensor &gamma, float epsilon, - int fixed_point_position); + ActivationLayerInfo act_info, + int fixed_point_position); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1