aboutsummaryrefslogtreecommitdiff
path: root/tests/validation/fixtures/GEMMFixture.h
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation/fixtures/GEMMFixture.h')
-rw-r--r--tests/validation/fixtures/GEMMFixture.h923
1 files changed, 675 insertions, 248 deletions
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index efe7567075..94bedc83e1 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
-#define ARM_COMPUTE_TEST_GEMM_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/TensorShape.h"
@@ -34,6 +34,7 @@
#include "tests/framework/Fixture.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
#include "tests/validation/reference/GEMM.h"
#include <random>
@@ -44,16 +45,15 @@ namespace test
{
namespace validation
{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
-class GEMMValidationFixture : public framework::Fixture
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMGenericValidationFixture : public framework::Fixture
{
public:
- template <typename...>
- void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false)
{
ARM_COMPUTE_UNUSED(pretranspose);
- _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
- _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
+ _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate);
+ _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate);
}
protected:
@@ -63,9 +63,14 @@ protected:
switch(tensor.data_type())
{
case DataType::F16:
+ {
+ arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) };
+ library->fill(tensor, distribution, i);
+ break;
+ }
case DataType::F32:
{
- std::uniform_real_distribution<> distribution(lo, hi);
+ std::uniform_real_distribution<float> distribution(lo, hi);
library->fill(tensor, distribution, i);
break;
}
@@ -75,7 +80,7 @@ protected:
}
TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
// Create tensors
TensorType a = create_tensor<TensorType>(shape_a, data_type, 1);
@@ -93,12 +98,14 @@ protected:
(disable_c) ? nullptr : &c,
&dst,
alpha, beta,
- GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d
- || reinterpret_output_as_3d)));
- ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
+ || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate));
+ ARM_COMPUTE_ASSERT(a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(c.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+
+ add_padding_x({ &a, &b, &c, &dst });
// Allocate tensors
a.allocator()->allocate();
@@ -106,18 +113,33 @@ protected:
c.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(a), 0);
fill(AccessorType(b), 1);
+ if (accumulate)
+ {
+ fill(AccessorType(dst), 6);
+ }
if(!disable_c)
{
fill(AccessorType(c), 2);
}
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ gemm.run();
+ fill(AccessorType(a), 3); // Fill tensors with new seed after run
+ fill(AccessorType(b), 4);
+ if(!disable_c)
+ {
+ fill(AccessorType(c), 5);
+ }
+ }
// Compute GEMM function
gemm.run();
@@ -126,10 +148,9 @@ protected:
}
SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
- DataType data_type)
+ DataType data_type, bool accumulate=false)
{
TensorShape shape_a_to_use = shape_a;
-
if(reinterpret_input_as_3d)
{
// Collapse the second and third dimension if the input is 3D
@@ -140,6 +161,7 @@ protected:
SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
SimpleTensor<T> b{ shape_b, data_type, 1 };
SimpleTensor<T> c{ output_shape, data_type, 1 };
+ SimpleTensor<T> dst{ output_shape, data_type, 1 };
// Fill reference
fill(a, 0);
@@ -152,27 +174,96 @@ protected:
const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(c.data() + i * n, c.data(), n * sizeof(T));
}
}
+ /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+ therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+ in order to be able to call reference implementation that works with (B x M x K) input.
+ Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+ // Define transposed shapes
+ TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
+ TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
+
+ // Define transposed tensors
+ SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
+ SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
+
+ // pretranspose a if necessary
+ if(pretranspose_a)
+ {
+ transpose_matrix<T>(a, a_transposed);
+ }
+
+ // pretranspose b if necessary
+ if(pretranspose_b)
+ {
+ transpose_matrix<T>(b, b_transposed);
+ }
+
+ // Run with variable inputs.
+ if(run_twice)
+ {
+ reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ fill((pretranspose_a) ? a_transposed : a, 3);
+ fill((pretranspose_b) ? b_transposed : b, 4);
+ fill(c, 5);
+ }
+
+ // Do in place summation
+ if (accumulate)
+ {
+ fill(dst, 6);
+ }
+
// Setting beta to 0 will effectively disable C for the
// computation of the reference: alpha * A * B + 0 * C
- return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta);
+ // Use transposed tensors if boolean enabled else use original tensors
+ if (accumulate)
+ {
+ reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst);
+ return dst;
+ }
+ else
+ {
+ return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
+ }
}
TensorType _target{};
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false>
+class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>
+{
+public:
+ void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
+ {
+ bool accumulate = true;
+ GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
DataType data_type, GPUTarget gpu_arch)
{
@@ -191,11 +282,14 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
// Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
- std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
library->fill_borders_with_garbage(tensor, distribution_inf, i);
}
@@ -216,12 +310,14 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- GEMMFunctionType gemm;
- gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
+ GEMMOperatorType gemm;
+ gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ add_padding_x({ &lhs, &rhs, &bias, &dst });
// Allocate tensors
lhs.allocator()->allocate();
@@ -229,10 +325,10 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -240,7 +336,11 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -268,7 +368,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -282,11 +382,10 @@ protected:
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -308,7 +407,10 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
@@ -329,12 +431,14 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- GEMMFunctionType gemm;
- gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
+ GEMMOperatorType gemm;
+ gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ add_padding_x({ &lhs, &rhs, &bias, &dst });
// Allocate tensors
lhs.allocator()->allocate();
@@ -342,10 +446,10 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -353,7 +457,11 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -381,7 +489,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -394,11 +502,10 @@ protected:
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -431,11 +538,14 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
// Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
- std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
library->fill_borders_with_garbage(tensor, distribution_inf, i);
}
@@ -458,16 +568,22 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- ReshapeLHSFunctionType reshape_lhs;
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info);
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
-
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ReshapeLHSOperatorType reshape_lhs;
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+ reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -477,12 +593,12 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -490,9 +606,15 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_lhs.run();
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
+ reshape_lhs.run(reshape_lhs_pack);
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -520,7 +642,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -534,11 +656,10 @@ protected:
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
{
@@ -574,7 +695,10 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
@@ -597,16 +721,22 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- ReshapeLHSFunctionType reshape_lhs;
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info);
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
-
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ReshapeLHSOperatorType reshape_lhs;
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+ reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -616,12 +746,12 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -629,9 +759,15 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_lhs.run();
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
+ reshape_lhs.run(reshape_lhs_pack);
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -659,7 +795,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -672,13 +808,12 @@ protected:
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
- bool interleave_rhs, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
+ bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
GEMMLHSMatrixInfo lhs_info;
lhs_info.m0 = m0;
@@ -688,11 +823,12 @@ public:
lhs_info.transpose = lhs_transpose;
GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = n0;
- rhs_info.k0 = k0;
- rhs_info.h0 = h0;
- rhs_info.interleave = interleave_rhs;
- rhs_info.transpose = !lhs_transpose;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = !lhs_transpose;
+ rhs_info.export_to_cl_image = export_to_cl_image;
// Set the tensor shapes for LHS and RHS matrices
const TensorShape lhs_shape(k, m, batch_size);
@@ -701,19 +837,25 @@ public:
broadcast_bias ? 1 : m,
broadcast_bias ? 1 : batch_size);
- _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
- _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+ if(validate_result)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ }
}
protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
// Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
- std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
library->fill_borders_with_garbage(tensor, distribution_inf, i);
}
@@ -744,16 +886,30 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- ReshapeLHSFunctionType reshape_lhs;
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info);
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ ReshapeLHSOperatorType reshape_lhs;
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ validate_result = validate_result || !rhs_info.export_to_cl_image;
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -763,12 +919,12 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -776,9 +932,15 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_lhs.run();
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
+ reshape_lhs.run(reshape_lhs_pack);
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -806,7 +968,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -823,18 +985,17 @@ protected:
}
}
+ bool validate_result = true;
TensorType _target{};
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
- bool interleave_lhs,
- bool interleave_rhs, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
+ bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
{
GEMMLHSMatrixInfo lhs_info;
lhs_info.m0 = m0;
@@ -844,11 +1005,12 @@ public:
lhs_info.transpose = lhs_transpose;
GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = n0;
- rhs_info.k0 = k0;
- rhs_info.h0 = h0;
- rhs_info.interleave = interleave_rhs;
- rhs_info.transpose = !lhs_transpose;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = !lhs_transpose;
+ rhs_info.export_to_cl_image = export_to_cl_image;
// In case of GEMM3D, m is the product between m_w and m_h
const unsigned int m = m_w * m_h;
@@ -858,15 +1020,21 @@ public:
const TensorShape rhs_shape(n, k, batch_size);
const TensorShape bias_shape(n, 1, 1);
- _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
- _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
+ if(validate_result)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
+ }
}
protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
@@ -897,16 +1065,30 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- ReshapeLHSFunctionType reshape_lhs;
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info);
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ ReshapeLHSOperatorType reshape_lhs;
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ validate_result = validate_result || !rhs_info.export_to_cl_image;
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -916,12 +1098,12 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -929,9 +1111,15 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_lhs.run();
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
+ reshape_lhs.run(reshape_lhs_pack);
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -959,7 +1147,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -975,28 +1163,29 @@ protected:
}
}
+ bool validate_result = true;
TensorType _target{};
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
- bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
+ bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
{
GEMMLHSMatrixInfo lhs_info;
lhs_info.m0 = m0;
lhs_info.k0 = k0;
GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = n0;
- rhs_info.k0 = k0;
- rhs_info.h0 = h0;
- rhs_info.interleave = interleave_rhs;
- rhs_info.transpose = transpose_rhs;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = transpose_rhs;
+ rhs_info.export_to_cl_image = export_to_cl_image;
// Set the tensor shapes for LHS and RHS matrices
const TensorShape lhs_shape(k, m, batch_size);
@@ -1005,19 +1194,25 @@ public:
broadcast_bias ? 1 : m,
broadcast_bias ? 1 : batch_size);
- _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
- _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+ if(validate_result)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ }
}
protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
// Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
- std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
library->fill_borders_with_garbage(tensor, distribution_inf, i);
}
@@ -1046,14 +1241,28 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ validate_result = validate_result || !rhs_info.export_to_cl_image;
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -1062,11 +1271,11 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -1074,8 +1283,13 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1103,7 +1317,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1113,28 +1327,29 @@ protected:
return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
}
+ bool validate_result = true;
TensorType _target{};
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
- bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
+ bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
{
GEMMLHSMatrixInfo lhs_info;
lhs_info.m0 = m0;
lhs_info.k0 = k0;
GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = n0;
- rhs_info.k0 = k0;
- rhs_info.h0 = h0;
- rhs_info.interleave = interleave_rhs;
- rhs_info.transpose = transpose_rhs;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.h0 = h0;
+ rhs_info.interleave = interleave_rhs;
+ rhs_info.transpose = transpose_rhs;
+ rhs_info.export_to_cl_image = export_to_cl_image;
// In case of GEMM3D, m is the product between m_w and m_h
const unsigned int m = m_w * m_h;
@@ -1144,21 +1359,27 @@ public:
const TensorShape rhs_shape(n, k, batch_size);
const TensorShape bias_shape(n, 1, 1);
- _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
- _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info, has_pad_y);
+ if(validate_result)
+ {
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
+ }
}
protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
DataType data_type, float alpha, float beta,
- unsigned int m_h, const ActivationLayerInfo &act_info)
+ unsigned int m_h, const ActivationLayerInfo &act_info, bool has_pad_y)
{
// Create tensors
TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
@@ -1178,18 +1399,39 @@ protected:
kernel_info.reinterpret_input_as_3d = false;
kernel_info.broadcast_bias = true;
kernel_info.activation_info = act_info;
+ kernel_info.has_pad_y = has_pad_y;
// The output tensor will be auto-initialized within the function
-
// Create and configure function
- ReshapeRHSFunctionType reshape_rhs;
- GEMMFunctionType gemm;
- reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
- gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ validate_result = validate_result || !rhs_info.export_to_cl_image;
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ if(has_pad_y)
+ {
+ // Add dummy padding into lhs to validate has_pad_y path
+ lhs.info()->extend_padding(PaddingSize(2, 0, 2, 0));
+ dst.info()->extend_padding(PaddingSize(2, 0, 1, 0));
+ }
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
+ if(!rhs_info.export_to_cl_image)
+ {
+ add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
+ }
// Allocate tensors
lhs.allocator()->allocate();
@@ -1198,11 +1440,11 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -1210,8 +1452,13 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- reshape_rhs.run();
- gemm.run();
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1239,7 +1486,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1248,15 +1495,15 @@ protected:
return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
}
+ bool validate_result = true;
TensorType _target{};
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
const ActivationLayerInfo &act_info)
{
@@ -1283,11 +1530,14 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
// Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
- std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
library->fill_borders_with_garbage(tensor, distribution_inf, i);
}
@@ -1313,12 +1563,14 @@ protected:
kernel_info.activation_info = act_info;
// Create and configure function
- GEMMFunctionType gemm;
- gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ GEMMOperatorType gemm;
+ gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ add_padding_x({ &lhs, &rhs, &bias, &dst });
// Allocate tensors
lhs.allocator()->allocate();
@@ -1326,10 +1578,10 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -1337,7 +1589,11 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1365,7 +1621,7 @@ protected:
if(broadcast_bias)
{
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1379,11 +1635,10 @@ protected:
SimpleTensor<T> _reference{};
};
-template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType>
+template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
{
public:
- template <typename...>
void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
const ActivationLayerInfo &act_info)
{
@@ -1411,7 +1666,10 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
library->fill(tensor, distribution, i);
}
@@ -1439,12 +1697,14 @@ protected:
// The output tensor will be auto-initialized within the function
// Create and configure function
- GEMMFunctionType gemm;
- gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+ GEMMOperatorType gemm;
+ gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
- ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ add_padding_x({ &lhs, &rhs, &bias, &dst });
// Allocate tensors
lhs.allocator()->allocate();
@@ -1452,10 +1712,10 @@ protected:
bias.allocator()->allocate();
dst.allocator()->allocate();
- ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
fill(AccessorType(lhs), 0);
@@ -1463,7 +1723,11 @@ protected:
fill(AccessorType(bias), 2);
// Compute GEMM
- gemm.run();
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
return dst;
}
@@ -1491,7 +1755,7 @@ protected:
fill(rhs, 1);
fill(bias, 2);
- // In case of broadcast, we need simply copy the first into the following "M" ones
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
for(int i = 1; i < m * batch_size; i++)
{
memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
@@ -1504,7 +1768,170 @@ protected:
SimpleTensor<T> _reference{};
};
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
+class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
+{
+public:
+ void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
+ float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ GEMMLHSMatrixInfo lhs_info;
+ lhs_info.m0 = m0;
+ lhs_info.k0 = k0;
+
+ GEMMRHSMatrixInfo rhs_info;
+ rhs_info.n0 = n0;
+ rhs_info.k0 = k0;
+ rhs_info.interleave = true;
+ rhs_info.transpose = false;
+ rhs_info.h0 = 4;
+ rhs_info.export_to_cl_image = export_to_cl_image;
+
+ // Set the tensor shapes for LHS and RHS matrices
+ const TensorShape lhs_shape(k, m, batch_size);
+ const TensorShape rhs_shape(n, k, batch_size);
+ const TensorShape bias_shape(n,
+ broadcast_bias ? 1 : m,
+ broadcast_bias ? 1 : batch_size);
+
+ _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
+ _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
+ using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
+
+ DistributionType distribution{ T(-1.0f), T(1.0f) };
+ library->fill(tensor, distribution, i);
+
+ // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+ DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
+ library->fill_borders_with_garbage(tensor, distribution_inf, i);
+ }
+
+ TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
+ {
+ // Create tensors
+ TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+ TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+ TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
+ TensorType rhs_reshaped;
+ TensorType dst;
+
+ const unsigned int M = lhs_shape[1];
+ const unsigned int N = rhs_shape[0];
+ const unsigned int K = lhs_shape[0];
+ GEMMKernelInfo kernel_info;
+ kernel_info.m = M;
+ kernel_info.n = N;
+ kernel_info.k = K;
+ kernel_info.depth_output_gemm3d = 0;
+ kernel_info.reinterpret_input_as_3d = false;
+ kernel_info.broadcast_bias = broadcast_bias;
+ kernel_info.activation_info = act_info;
+
+ // Create and configure function
+ ReshapeRHSOperatorType reshape_rhs;
+ GEMMOperatorType gemm;
+
+ validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
+
+ validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
+ if(!validate_result)
+ {
+ return nullptr;
+ }
+
+ gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
+
+ ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
+
+ // Allocate tensors
+ lhs.allocator()->allocate();
+ rhs.allocator()->allocate();
+ rhs_reshaped.allocator()->allocate();
+ bias.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
+ ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+
+ // Fill tensors
+ fill(AccessorType(lhs), 0);
+ fill(AccessorType(rhs), 1);
+ fill(AccessorType(bias), 2);
+
+ // Compute GEMM
+ ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
+ reshape_rhs.run(reshape_rhs_pack);
+ ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
+ { ACL_SRC_1, &rhs_reshaped },
+ { ACL_SRC_2, &bias },
+ { ACL_DST, &dst } });
+ gemm.run(gemm_pack);
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
+ const ActivationLayerInfo &act_info)
+ {
+ if(!validate_result)
+ return SimpleTensor<T>();
+
+ TensorShape dst_shape = lhs_shape;
+ dst_shape[0] = rhs_shape[0];
+ dst_shape[1] = lhs_shape[1];
+
+ // Create reference
+ SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+ SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+ SimpleTensor<T> bias{ dst_shape, data_type, 1 };
+
+ const int n = rhs_shape[0];
+ const int m = lhs_shape[1];
+ const int batch_size = lhs_shape[2];
+
+ // Fill reference
+ fill(lhs, 0);
+ fill(rhs, 1);
+ fill(bias, 2);
+
+ if(broadcast_bias)
+ {
+ // In case of broadcast, we need to simply copy the first into the following "M" ones
+ for(int i = 1; i < m * batch_size; i++)
+ {
+ memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
+ }
+ }
+
+ return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
+ }
+
+ bool validate_result = true;
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H