aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs158
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp22
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp2
-rw-r--r--tests/benchmark/CL/SoftmaxLayer.cpp60
-rw-r--r--tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp60
-rw-r--r--tests/benchmark/fixtures/ConvolutionLayerFixture.h2
-rw-r--r--tests/benchmark/fixtures/SoftmaxLayerFixture.h98
-rw-r--r--tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp2
8 files changed, 320 insertions, 84 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
index 1a2c3f7b20..c9fabc5fcd 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
@@ -45,7 +45,7 @@ const vec4 vec4_min = vec4(float_min);
/** Identifies the maximum value across the 1st dimension.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
- * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed.
+ * @note In case the input is not multiple of 8 NON_MULTIPLE_OF_8 must be passed.
*
* @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_attrs The attributes of the source tensor
@@ -74,21 +74,24 @@ void main(void)
vec4 max_val = vec4_min;
// Calculate max of row
- uint width2 = width >> 2;
- for(int i = 0; i < int(width2); i++)
+ uint width3 = width >> 3;
+ for(int i = 0; i < int(width3); i++)
{
- vec4 data = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0));
- max_val = MAX_OP(data, max_val);
+ vec4 data[2];
+ data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0));
+ data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, (i << 3) + 4, 0));
+ max_val = MAX_OP(data[0], max_val);
+ max_val = MAX_OP(data[1], max_val);
}
-#ifdef NON_MULTIPLE_OF_4
- // Handle non multiple of 4
- for(int i = int(width2 << 2); i < int(width); i++)
+#ifdef NON_MULTIPLE_OF_8
+ // Handle non multiple of 8
+ for(int i = int(width3 << 3); i < int(width); i++)
{
float data = LOAD(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
max_val.x = MAX_OP(data, max_val.x);
}
-#endif /* NON_MULTIPLE_OF_4 */
+#endif /* NON_MULTIPLE_OF_8 */
// Perform max reduction
max_val.xy = MAX_OP(max_val.xy, max_val.zw);
@@ -111,25 +114,29 @@ void main(void)
vec4 max_val = vec4_min;
// Calculate max of row
- uint width2 = width >> 2;
- for(int i = 0; i < int(width2); i++)
+ uint width3 = width >> 3;
+ for(int i = 0; i < int(width3); i++)
{
- vec4 data = VLOAD2_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0));
- max_val = MAX_OP(data, max_val);
+ vec4 data[2];
+ data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0));
+ max_val = MAX_OP(data[0], max_val);
+ max_val = MAX_OP(data[1], max_val);
}
-#ifdef NON_MULTIPLE_OF_4
- // Handle non multiple of 4
- for(int i = int(width2 << 2); i < int(width); i = i + 2)
+#ifdef NON_MULTIPLE_OF_8
+ // Handle non multiple of 8
+ uint width1 = width >> 1 << 1;
+ for(int i = int(width3 << 3); i < int(width1); i = i + 2)
{
- vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
+ vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
+ max_val.xy = MAX_OP(data, max_val.xy);
+ }
+ if(width != width1)
+ {
+ vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, width1, 0));
max_val.x = MAX_OP(data.x, max_val.x);
- if((i + 1) < int(width))
- {
- max_val.x = MAX_OP(data.y, max_val.x);
- }
}
-#endif /* NON_MULTIPLE_OF_4 */
+#endif /* NON_MULTIPLE_OF_8 */
// Perform max reduction
max_val.xy = MAX_OP(max_val.xy, max_val.zw);
@@ -146,7 +153,7 @@ void main(void)
* then gets the exponent of each element as sums all elements across each row.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
- * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed.
+ * @note In case the input is not multiple of 8 NON_MULTIPLE_OF_8 must be passed.
*
* @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16/F32
* @param[in] src_attrs The attributes of the source tensor
@@ -187,19 +194,25 @@ void main(void)
vec4 sum1D = vec4(0);
// Shift values, exp and sum
- uint width2 = width >> 2;
- for(int i = 0; i < int(width2); i++)
+ uint width3 = width >> 3;
+ for(int i = 0; i < int(width3); i++)
{
- vec4 data = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0));
- data = SUB_OP(data, max_val);
- data = EXP_OP(data);
- VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, i << 2, 0), data);
- sum1D = ADD_OP(sum1D, data);
+ vec4 data[2];
+ data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0));
+ data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, (i << 3) + 4, 0));
+ data[0] = SUB_OP(data[0], max_val);
+ data[1] = SUB_OP(data[1], max_val);
+ data[0] = EXP_OP(data[0]);
+ data[1] = EXP_OP(data[1]);
+ VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, i << 3, 0), data[0]);
+ VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, (i << 3) + 4, 0), data[1]);
+ sum1D = ADD_OP(sum1D, data[0]);
+ sum1D = ADD_OP(sum1D, data[1]);
}
-#ifdef NON_MULTIPLE_OF_4
- // Handle non multiple of 4
- for(int i = int(width2 << 2); i < int(width); i++)
+#ifdef NON_MULTIPLE_OF_8
+ // Handle non multiple of 8
+ for(int i = int(width3 << 3); i < int(width); i++)
{
float data = LOAD(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
data = SUB_OP(data, max_val.x);
@@ -207,7 +220,7 @@ void main(void)
STORE(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), data);
sum1D.x = ADD_OP(sum1D.x, data);
}
-#endif /* NON_MULTIPLE_OF_4 */
+#endif /* NON_MULTIPLE_OF_8 */
// Perform min/max reduction
sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw);
@@ -238,44 +251,40 @@ void main(void)
vec4 sum1D = vec4(0.f);
// Shift values, exp and sum
- uint width2 = width >> 2;
- for(int i = 0; i < int(width2); i++)
+ uint width3 = width >> 3;
+ for(int i = 0; i < int(width3); i++)
{
- vec4 data = VLOAD2_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 2, 0));
- data = SUB_OP(data, max_val);
- data = EXP_OP(data);
- VSTORE2_PACK4_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i << 2, 0), data);
- sum1D = ADD_OP(sum1D, data);
+ vec4 data[2];
+ data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, i << 3, 0));
+ data[0] = SUB_OP(data[0], max_val);
+ data[1] = SUB_OP(data[1], max_val);
+ data[0] = EXP_OP(data[0]);
+ data[1] = EXP_OP(data[1]);
+ VSTORE4_PACK8_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i << 3, 0), data);
+ sum1D = ADD_OP(sum1D, data[0]);
+ sum1D = ADD_OP(sum1D, data[1]);
}
-#ifdef NON_MULTIPLE_OF_4
- // Handle non multiple of 4
- for(int i = int(width2 << 2); i < int(width); i = i + 2)
+#ifdef NON_MULTIPLE_OF_8
+ // Handle non multiple of 8
+ uint width1 = width >> 1 << 1;
+ for(int i = int(width3 << 3); i < int(width1); i = i + 2)
{
- float data;
- vec2 datamiddle = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
- data = SUB_OP(datamiddle.x, max_val.x);
- data = EXP_OP(data);
- vec2 datares;
- if((i + 1) < int(width))
- {
- float data2;
- data2 = SUB_OP(datamiddle.y, max_val.x);
- data2 = EXP_OP(data2);
- datares = vec2(data, data2);
- data = ADD_OP(data2, data);
- }
- else
- {
- datares = vec2(data, 0.f);
- }
-
- STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), datares);
-
+ vec2 data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, i, 0));
+ data = SUB_OP(data, max_val.xy);
+ data = EXP_OP(data);
+ STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, i, 0), data);
+ sum1D.xy = ADD_OP(sum1D.xy, data);
+ }
+ if(width != width1)
+ {
+ float data = LOAD_UNPACK2_HALF(src_ptr, IMAGE_OFFSET(src_iter, width1, 0)).x;
+ data = SUB_OP(data, max_val.x);
+ data = EXP_OP(data);
+ STORE_PACK2_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, width1, 0), vec2(data, 0.0));
sum1D.x = ADD_OP(sum1D.x, data);
}
-#endif /* NON_MULTIPLE_OF_4 */
-
+#endif /* NON_MULTIPLE_OF_8 */
// Perform min/max reduction
sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw);
sum1D.x = ADD_OP(sum1D.x, sum1D.y);
@@ -317,8 +326,12 @@ void main(void)
// Load max value of 1D logits vector (row)
vec4 sum_val = vec4(LOAD(sum_ptr, IMAGE_OFFSET(sum_iter, 0, gl_GlobalInvocationID.y)));
- vec4 data = VLOAD4_CURRENT_ITEM(vec4, src_ptr, src_iter);
- VSTORE4_CURRENT_ITEM(dst_ptr, dst_iter, DIV_OP(data, sum_val));
+
+ vec4 data[2];
+ data[0] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, 0, 0));
+ data[1] = VLOAD4(vec4, src_ptr, IMAGE_OFFSET(src_iter, 4, 0));
+ VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, 0, 0), DIV_OP(data[0], sum_val));
+ VSTORE4(dst_ptr, IMAGE_OFFSET(dst_iter, 4, 0), DIV_OP(data[1], sum_val));
}
#elif defined(DATA_TYPE_FP16)
TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, readonly);
@@ -332,8 +345,13 @@ void main(void)
// Load max value of 1D logits vector (row)
vec4 sum_val = vec4(LOAD_UNPACK2_HALF(sum_ptr, IMAGE_OFFSET(sum_iter, 0, gl_GlobalInvocationID.y)).x);
- vec4 data = VLOAD2_UNPACK4_CURRENT_ITEM_HALF(src_ptr, src_iter);
- VSTORE2_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, DIV_OP(data, sum_val));
+
+ vec4 data[2];
+ data = VLOAD4_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, 0, 0));
+ vec4 ret[2];
+ ret[0] = DIV_OP(data[0], sum_val);
+ ret[1] = DIV_OP(data[1], sum_val);
+ VSTORE4_PACK8_HALF(dst_ptr, IMAGE_OFFSET(dst_iter, 0, 0), ret);
}
#else // DATA_TYPE_FP32
#error Data type not supported
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
index 29a1385f87..040a66358f 100644
--- a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
@@ -66,10 +66,10 @@ void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
build_opts.insert("#define SOFTMAX_LAYER_MAX");
- // Tell the kernel that the width is not a multiple of 4
- if((input->info()->dimension(0) % 4) != 0)
+ // Tell the kernel that the width is not a multiple of 8
+ if((input->info()->dimension(0) % 8) != 0)
{
- build_opts.insert("#define NON_MULTIPLE_OF_4");
+ build_opts.insert("#define NON_MULTIPLE_OF_8");
}
// Create kernel
@@ -80,8 +80,8 @@ void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
_kernel.set_argument(idx++, input->info()->dimension(0));
// Configure kernel window
- // The kernel loops over all elements in steps of 4
- const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4);
+ // The kernel loops over all elements in steps of 8
+ const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
unsigned int num_elems_written_per_iteration = 1;
if(input->info()->data_type() == DataType::F16)
{
@@ -131,10 +131,10 @@ void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTen
build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM");
- // Tell the kernel that the width is not a multiple of 4
- if((input->info()->dimension(0) % 4) != 0)
+ // Tell the kernel that the width is not a multiple of 8
+ if((input->info()->dimension(0) % 8) != 0)
{
- build_opts.insert("#define NON_MULTIPLE_OF_4");
+ build_opts.insert("#define NON_MULTIPLE_OF_8");
}
// Create kernel
@@ -145,8 +145,8 @@ void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTen
_kernel.set_argument(idx++, input->info()->dimension(0));
// Configure window
- // The kernel loops over all elements in steps of 4
- const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4);
+ // The kernel loops over all elements in steps of 8
+ const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 8);
unsigned int num_elems_written_per_iteration = 1;
if(input->info()->data_type() == DataType::F16)
{
@@ -227,7 +227,7 @@ void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *su
_kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts));
// Configure window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
+ constexpr unsigned int num_elems_processed_per_iteration = 8;
unsigned int num_elems_written_per_iteration = 1;
if(input->info()->data_type() == DataType::F16)
{
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
index d7d47d2802..1db927c8ff 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -61,6 +61,8 @@ void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output)
void GCSoftmaxLayer::run()
{
GCScheduler::get().enqueue(_max_kernel, false);
+ GCScheduler::get().sync();
GCScheduler::get().enqueue(_shift_exp_sum_kernel, false);
+ GCScheduler::get().sync();
GCScheduler::get().enqueue(_norm_kernel);
}
diff --git a/tests/benchmark/CL/SoftmaxLayer.cpp b/tests/benchmark/CL/SoftmaxLayer.cpp
new file mode 100644
index 0000000000..6f0918fd95
--- /dev/null
+++ b/tests/benchmark/CL/SoftmaxLayer.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/SoftmaxLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+} // namespace
+
+using CLSoftmaxLayerFixture = SoftmaxLayerFixture<CLTensor, CLSoftmaxLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, CLSoftmaxLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(datasets::SoftmaxLayerSmallShapes(), data_types));
+
+TEST_SUITE(NIGHTLY)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, CLSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(datasets::SoftmaxLayerLargeShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp
new file mode 100644
index 0000000000..66123aa57f
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/SoftmaxLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+} // namespace
+
+using GCSoftmaxLayerFixture = SoftmaxLayerFixture<GCTensor, GCSoftmaxLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, GCSoftmaxLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(datasets::SoftmaxLayerSmallShapes(), data_types));
+
+TEST_SUITE(NIGHTLY)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SoftmaxLayer, GCSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(datasets::SoftmaxLayerLargeShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
index 09e6cbfaf8..b526cc3870 100644
--- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
@@ -81,8 +81,6 @@ public:
#ifdef ARM_COMPUTE_GC
if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
{
- GCScheduler::get().sync();
- force_sync_tensor(src);
force_sync_tensor(dst);
}
#endif /* ARM_COMPUTE_GC */
diff --git a/tests/benchmark/fixtures/SoftmaxLayerFixture.h b/tests/benchmark/fixtures/SoftmaxLayerFixture.h
new file mode 100644
index 0000000000..6e0472ce3a
--- /dev/null
+++ b/tests/benchmark/fixtures/SoftmaxLayerFixture.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE
+#define ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+
+using namespace arm_compute::test::gles_compute;
+#endif /* ARM_COMPUTE_GC */
+
+namespace arm_compute
+{
+namespace test
+{
+/** Fixture that can be used for NEON, CL and OpenGL ES */
+template <typename TensorType, typename Function, typename Accessor>
+class SoftmaxLayerFixture : public framework::Fixture
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, DataType data_type)
+ {
+ // Set batched in source and destination shapes
+ const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+ // Create tensors
+ src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+ dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Create and configure function
+ smx_layer.configure(&src, &dst);
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Fill tensors
+ library->fill_tensor_uniform(Accessor(src), 0);
+ }
+
+ void run()
+ {
+ smx_layer.run();
+#ifdef ARM_COMPUTE_GC
+ if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
+ {
+ force_sync_tensor(dst);
+ }
+#endif /* ARM_COMPUTE_GC */
+ }
+
+ void teardown()
+ {
+ src.allocator()->free();
+ dst.allocator()->free();
+ }
+
+private:
+ TensorType src{};
+ TensorType dst{};
+ Function smx_layer{};
+};
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SOFTMAXLAYERFIXTURE */
diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
index 888f87e9ef..a2114a9c37 100644
--- a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -79,7 +79,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datase
validate(dst.info()->valid_region(), valid_region);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding();
validate(src.info()->padding(), padding);
validate(dst.info()->padding(), padding);
}