diff options
-rw-r--r-- | src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs | 158 | ||||
-rw-r--r-- | src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp | 22 | ||||
-rw-r--r-- | src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 2 | ||||
-rw-r--r-- | tests/benchmark/CL/SoftmaxLayer.cpp | 60 | ||||
-rw-r--r-- | tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp | 60 | ||||
-rw-r--r-- | tests/benchmark/fixtures/ConvolutionLayerFixture.h | 2 | ||||
-rw-r--r-- | tests/benchmark/fixtures/SoftmaxLayerFixture.h | 98 | ||||
-rw-r--r-- | tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp | 2 |
8 files changed, 320 insertions, 84 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs index 1a2c3f7b20..c9fabc5fcd 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs @@ -45,7 +45,7 @@ const vec4 vec4_min = vec4(float_min); /** Identifies the maximum value across the 1st dimension. * * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32" - * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed. + * @note In case the input is not multiple of 8 NON_MULTIPLE_OF_8 must be passed. * * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32 * @param[in] src_attrs The attributes of the source tensor @@ -74,21 +74,24 @@ void main(void) vec4 max_val = vec4_min; // Calculate max of row - uint width2 = width >> 2; - for(int i = 0; i < int(width2); i++) + uint width3 = width >> 3; + for(int i = 0; i < int(width3); i++) { - vec4 data = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0)); - max_val = MAX_OP(data, max_val); + vec4 data[2]; + data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0)); + data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, (i << 3) + 4, 0)); + max_val = MAX_OP(data[0], max_val); + max_val = MAX_OP(data[1], max_val); } -#ifdef NON_MULTIPLE_OF_4 - // Handle non multiple of 4 - for(int i = int(width2 << 2); i < int(width); i++) +#ifdef NON_MULTIPLE_OF_8 + // Handle non multiple of 8 + for(int i = int(width3 << 3); i < int(width); i++) { float data = LOAD(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); max_val.x = MAX_OP(data, max_val.x); } -#endif /* NON_MULTIPLE_OF_4 */ +#endif /* NON_MULTIPLE_OF_8 */ // Perform max reduction max_val.xy = MAX_OP(max_val.xy, max_val.zw); @@ -111,25 +114,29 @@ void main(void) vec4 max_val = vec4_min; // Calculate max of row - uint width2 = width >> 2; - for(int i = 0; i < int(width2); i++) + uint width3 = width >> 3; + for(int i = 0; i < int(width3); i++) { - vec4 data = VLOAD2_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0)); - max_val = MAX_OP(data, max_val); + vec4 data[2]; + data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0)); + max_val = MAX_OP(data[0], max_val); + max_val = MAX_OP(data[1], max_val); } -#ifdef NON_MULTIPLE_OF_4 - // Handle non multiple of 4 - for(int i = int(width2 << 2); i < int(width); i = i + 2) +#ifdef NON_MULTIPLE_OF_8 + // Handle non multiple of 8 + uint width1 = width >> 1 << 1; + for(int i = int(width3 << 3); i < int(width1); i = i + 2) { - vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); + vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); + max_val.xy = MAX_OP(data, max_val.xy); + } + if(width != width1) + { + vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, width1, 0)); max_val.x = MAX_OP(data.x, max_val.x); - if((i + 1) < int(width)) - { - max_val.x = MAX_OP(data.y, max_val.x); - } } -#endif /* NON_MULTIPLE_OF_4 */ +#endif /* NON_MULTIPLE_OF_8 */ // Perform max reduction max_val.xy = MAX_OP(max_val.xy, max_val.zw); @@ -146,7 +153,7 @@ void main(void) * then gets the exponent of each element as sums all elements across each row. * * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32" - * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed. + * @note In case the input is not multiple of 8 NON_MULTIPLE_OF_8 must be passed. * * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32 * @param[in] src_attrs The attributes of the source tensor @@ -187,19 +194,25 @@ void main(void) vec4 sum1D = vec4(0); // Shift values, exp and sum - uint width2 = width >> 2; - for(int i = 0; i < int(width2); i++) + uint width3 = width >> 3; + for(int i = 0; i < int(width3); i++) { - vec4 data = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0)); - data = SUB_OP(data, max_val); - data = EXP_OP(data); - VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, i << 2, 0), data); - sum1D = ADD_OP(sum1D, data); + vec4 data[2]; + data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0)); + data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, (i << 3) + 4, 0)); + data[0] = SUB_OP(data[0], max_val); + data[1] = SUB_OP(data[1], max_val); + data[0] = EXP_OP(data[0]); + data[1] = EXP_OP(data[1]); + VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, i << 3, 0), data[0]); + VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, (i << 3) + 4, 0), data[1]); + sum1D = ADD_OP(sum1D, data[0]); + sum1D = ADD_OP(sum1D, data[1]); } -#ifdef NON_MULTIPLE_OF_4 - // Handle non multiple of 4 - for(int i = int(width2 << 2); i < int(width); i++) +#ifdef NON_MULTIPLE_OF_8 + // Handle non multiple of 8 + for(int i = int(width3 << 3); i < int(width); i++) { float data = LOAD(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); data = SUB_OP(data, max_val.x); @@ -207,7 +220,7 @@ void main(void) STORE(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), data); sum1D.x = ADD_OP(sum1D.x, data); } -#endif /* NON_MULTIPLE_OF_4 */ +#endif /* NON_MULTIPLE_OF_8 */ // Perform min/max reduction sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw); @@ -238,44 +251,40 @@ void main(void) vec4 sum1D = vec4(0.f); // Shift values, exp and sum - uint width2 = width >> 2; - for(int i = 0; i < int(width2); i++) + uint width3 = width >> 3; + for(int i = 0; i < int(width3); i++) { - vec4 data = VLOAD2_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0)); - data = SUB_OP(data, max_val); - data = EXP_OP(data); - VSTORE2_PACK4_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i << 2, 0), data); - sum1D = ADD_OP(sum1D, data); + vec4 data[2]; + data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0)); + data[0] = SUB_OP(data[0], max_val); + data[1] = SUB_OP(data[1], max_val); + data[0] = EXP_OP(data[0]); + data[1] = EXP_OP(data[1]); + VSTORE4_PACK8_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i << 3, 0), data); + sum1D = ADD_OP(sum1D, data[0]); + sum1D = ADD_OP(sum1D, data[1]); } -#ifdef NON_MULTIPLE_OF_4 - // Handle non multiple of 4 - for(int i = int(width2 << 2); i < int(width); i = i + 2) +#ifdef NON_MULTIPLE_OF_8 + // Handle non multiple of 8 + uint width1 = width >> 1 << 1; + for(int i = int(width3 << 3); i < int(width1); i = i + 2) { - float data; - vec2 datamiddle = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); - data = SUB_OP(datamiddle.x, max_val.x); - data = EXP_OP(data); - vec2 datares; - if((i + 1) < int(width)) - { - float data2; - data2 = SUB_OP(datamiddle.y, max_val.x); - data2 = EXP_OP(data2); - datares = vec2(data, data2); - data = ADD_OP(data2, data); - } - else - { - datares = vec2(data, 0.f); - } - - STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), datares); - + vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0)); + data = SUB_OP(data, max_val.xy); + data = EXP_OP(data); + STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), data); + sum1D.xy = ADD_OP(sum1D.xy, data); + } + if(width != width1) + { + float data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, width1, 0)).x; + data = SUB_OP(data, max_val.x); + data = EXP_OP(data); + STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, width1, 0), vec2(data, 0.0)); sum1D.x = ADD_OP(sum1D.x, data); } -#endif /* NON_MULTIPLE_OF_4 */ - +#endif /* NON_MULTIPLE_OF_8 */ // Perform min/max reduction sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw); sum1D.x = ADD_OP(sum1D.x, sum1D.y); @@ -317,8 +326,12 @@ void main(void) // Load max value of 1D logits vector (row) vec4 sum_val = vec4(LOAD(sum_ptr, IMAGE_OFFSET(sum_iter, 0, gl_GlobalInvocationID.y))); - vec4 data = VLOAD4_CURRENT_ITEM(vec4, src_ptr, src_iter); - VSTORE4_CURRENT_ITEM(dst_ptr, dst_iter, DIV_OP(data, sum_val)); + + vec4 data[2]; + data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, 0, 0)); + data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, 4, 0)); + VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, 0, 0), DIV_OP(data[0], sum_val)); + VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, 4, 0), DIV_OP(data[1], sum_val)); } #elif defined(DATA_TYPE_FP16) TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, readonly); @@ -332,8 +345,13 @@ void main(void) // Load max value of 1D logits vector (row) vec4 sum_val = vec4(LOAD_UNPACK2_HALF(sum_ptr, IMAGE_OFFSET(sum_iter, 0, gl_GlobalInvocationID.y)).x); - vec4 data = VLOAD2_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter); - VSTORE2_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, DIV_OP(data, sum_val)); + + vec4 data[2]; + data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, 0)); + vec4 ret[2]; + ret[0] = DIV_OP(data[0], sum_val); + ret[1] = DIV_OP(data[1], sum_val); + VSTORE4_PACK8_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, 0, 0), ret); } #else // DATA_TYPE_FP32 #error Data type not supported diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp index 29a1385f87..040a66358f 100644 --- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp @@ -66,10 +66,10 @@ void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output) build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); build_opts.insert("#define SOFTMAX_LAYER_MAX"); - // Tell the kernel that the width is not a multiple of 4 - if((input->info()->dimension(0) % 4) != 0) + // Tell the kernel that the width is not a multiple of 8 + if((input->info()->dimension(0) % 8) != 0) { - build_opts.insert("#define NON_MULTIPLE_OF_4"); + build_opts.insert("#define NON_MULTIPLE_OF_8"); } // Create kernel @@ -80,8 +80,8 @@ void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output) _kernel.set_argument(idx++, input->info()->dimension(0)); // Configure kernel window - // The kernel loops over all elements in steps of 4 - const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4); + // The kernel loops over all elements in steps of 8 + const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8); unsigned int num_elems_written_per_iteration = 1; if(input->info()->data_type() == DataType::F16) { @@ -131,10 +131,10 @@ void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTen build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM"); - // Tell the kernel that the width is not a multiple of 4 - if((input->info()->dimension(0) % 4) != 0) + // Tell the kernel that the width is not a multiple of 8 + if((input->info()->dimension(0) % 8) != 0) { - build_opts.insert("#define NON_MULTIPLE_OF_4"); + build_opts.insert("#define NON_MULTIPLE_OF_8"); } // Create kernel @@ -145,8 +145,8 @@ void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTen _kernel.set_argument(idx++, input->info()->dimension(0)); // Configure window - // The kernel loops over all elements in steps of 4 - const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4); + // The kernel loops over all elements in steps of 8 + const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8); unsigned int num_elems_written_per_iteration = 1; if(input->info()->data_type() == DataType::F16) { @@ -227,7 +227,7 @@ void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *su _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts)); // Configure window - constexpr unsigned int num_elems_processed_per_iteration = 4; + constexpr unsigned int num_elems_processed_per_iteration = 8; unsigned int num_elems_written_per_iteration = 1; if(input->info()->data_type() == DataType::F16) { diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp index d7d47d2802..1db927c8ff 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -61,6 +61,8 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output) void GCSoftmaxLayer::run() { GCScheduler::get().enqueue(_max_kernel, false); + GCScheduler::get().sync(); GCScheduler::get().enqueue(_shift_exp_sum_kernel, false); + GCScheduler::get().sync(); GCScheduler::get().enqueue(_norm_kernel); } diff --git a/tests/benchmark/CL/SoftmaxLayer.cpp b/tests/benchmark/CL/SoftmaxLayer.cpp new file mode 100644 index 0000000000..6f0918fd95 --- /dev/null +++ b/tests/benchmark/CL/SoftmaxLayer.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/benchmark/fixtures/SoftmaxLayerFixture.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); +} // namespace + +using CLSoftmaxLayerFixture = SoftmaxLayerFixture<CLTensor, CLSoftmaxLayer, CLAccessor>; + +TEST_SUITE(CL) + +REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, CLSoftmaxLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(datasets::SoftmaxLayerSmallShapes(), data_types)); + +TEST_SUITE(NIGHTLY) + +REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, CLSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(datasets::SoftmaxLayerLargeShapes(), data_types)); + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp new file mode 100644 index 0000000000..66123aa57f --- /dev/null +++ b/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/benchmark/fixtures/SoftmaxLayerFixture.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); +} // namespace + +using GCSoftmaxLayerFixture = SoftmaxLayerFixture<GCTensor, GCSoftmaxLayer, GCAccessor>; + +TEST_SUITE(GC) + +REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, GCSoftmaxLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(datasets::SoftmaxLayerSmallShapes(), data_types)); + +TEST_SUITE(NIGHTLY) + +REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, GCSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(datasets::SoftmaxLayerLargeShapes(), data_types)); + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h index 09e6cbfaf8..b526cc3870 100644 --- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h +++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h @@ -81,8 +81,6 @@ public: #ifdef ARM_COMPUTE_GC if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value) { - GCScheduler::get().sync(); - force_sync_tensor(src); force_sync_tensor(dst); } #endif /* ARM_COMPUTE_GC */ diff --git a/tests/benchmark/fixtures/SoftmaxLayerFixture.h b/tests/benchmark/fixtures/SoftmaxLayerFixture.h new file mode 100644 index 0000000000..6e0472ce3a --- /dev/null +++ b/tests/benchmark/fixtures/SoftmaxLayerFixture.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE +#define ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" + +#ifdef ARM_COMPUTE_GC +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "tests/GLES_COMPUTE/Helper.h" + +using namespace arm_compute::test::gles_compute; +#endif /* ARM_COMPUTE_GC */ + +namespace arm_compute +{ +namespace test +{ +/** Fixture that can be used for NEON, CL and OpenGL ES */ +template <typename TensorType, typename Function, typename Accessor> +class SoftmaxLayerFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(TensorShape shape, DataType data_type) + { + // Set batched in source and destination shapes + const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + // Create tensors + src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position); + dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + smx_layer.configure(&src, &dst); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill tensors + library->fill_tensor_uniform(Accessor(src), 0); + } + + void run() + { + smx_layer.run(); +#ifdef ARM_COMPUTE_GC + if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value) + { + force_sync_tensor(dst); + } +#endif /* ARM_COMPUTE_GC */ + } + + void teardown() + { + src.allocator()->free(); + dst.allocator()->free(); + } + +private: + TensorType src{}; + TensorType dst{}; + Function smx_layer{}; +}; +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE */ diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp index 888f87e9ef..a2114a9c37 100644 --- a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp +++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp @@ -79,7 +79,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding(); + const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding(); validate(src.info()->padding(), padding); validate(dst.info()->padding(), padding); } |