diff options
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 85d8ab7cb4..02bf35a860 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -26,12 +26,15 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "support/ToolchainSupport.h" + using namespace arm_compute; CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() @@ -42,7 +45,7 @@ CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); // Output tensor auto initialization if not yet initialized @@ -54,10 +57,6 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma); ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0)); - // Set build options - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - _input = input; _output = output; _mean = mean; @@ -66,17 +65,25 @@ void CLBatchNormalizationLayerKernel::configure(const ICLTensor *input, ICLTenso _gamma = gamma; _epsilon = epsilon; + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + + // Set build options + std::set<std::string> build_opts; + build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + if(is_data_type_fixed_point(input->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + } + // Create kernel - std::string kernel_name = "batchnormalization_layer"; - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); // Set kernel static arguments unsigned int idx = 2 * num_arguments_per_3D_tensor() + 4 * num_arguments_per_1D_tensor(); // Skip the input and output parameters _kernel.setArg<cl_float>(idx++, _epsilon); // Configure kernel window - const unsigned int num_elems_processed_per_iteration = 4; - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); |