From 0d27b2ee8d811d66693555ac1e7be44d93e662e2 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Thu, 24 Aug 2023 14:01:20 +0100 Subject: Remove legacy PostOps code PostOps was the experimental interface for Dynamic Fusion. It is now replaced by the new Dynamic Fusion interface with code generation using the Compute Kernel Writer. Resolves: COMPMID-6190 Change-Id: I813b48facef2fd6f3aee332588886b4f9b3d33d8 Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10219 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- tests/validation/fixtures/GEMMFixture.h | 848 +++----------------------------- 1 file changed, 68 insertions(+), 780 deletions(-) (limited to 'tests/validation/fixtures/GEMMFixture.h') diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index f1e0ee9150..afde3d8067 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -21,14 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE -#define ARM_COMPUTE_TEST_GEMM_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/experimental/IPostOp.h" -#include "src/core/experimental/PostOpUtils.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -38,7 +36,6 @@ #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/GEMM.h" -#include "tests/validation/reference/PostOps.h" #include @@ -304,8 +301,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -426,8 +422,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -580,8 +575,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -734,8 +728,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -908,8 +901,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -960,471 +952,40 @@ protected: SimpleTensor _reference{}; }; -/** (EXPERIMENTAL_POST_OPS)*/ -template -class GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture : public framework::Fixture -{ -public: - using PostOpArgBroadcast = std::tuple; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument -public: - void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, - bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info, - const experimental::PostOpList &post_ops) - { - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = m0; - lhs_info.k0 = k0; - lhs_info.v0 = v0; - lhs_info.interleave = interleave_lhs; - lhs_info.transpose = lhs_transpose; - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = n0; - rhs_info.k0 = k0; - rhs_info.h0 = h0; - rhs_info.interleave = interleave_rhs; - rhs_info.transpose = !lhs_transpose; - rhs_info.export_to_cl_image = export_to_cl_image; - - // Set the tensor shapes for LHS and RHS matrices - const TensorShape lhs_shape(k, m, batch_size); - const TensorShape rhs_shape(n, k, batch_size); - const TensorShape bias_shape(n, - broadcast_bias ? 1 : m, - broadcast_bias ? 1 : batch_size); - auto post_ops_with_shapes = experimental::transform_post_op_list_arguments(post_ops, - [ = ](auto broadcast) - { - return TensorShape - { - std::get<0>(broadcast) ? 1 : n, - std::get<1>(broadcast) ? 1 : m, - std::get<2>(broadcast) ? 1 : batch_size, - }; - }); - - _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); - if(validate_result) - { - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); - } - } - -protected: - template - void fill(U &&tensor, int i) - { - static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); - using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; - - DistributionType distribution{ T(-1.0f), T(1.0f) }; - library->fill(tensor, distribution, i); - - // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) - DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; - library->fill_borders_with_garbage(tensor, distribution_inf, i); - } - - TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) - { - // Create tensors - TensorType lhs = create_tensor(lhs_shape, data_type, 1); - TensorType rhs = create_tensor(rhs_shape, data_type, 1); - TensorType bias = create_tensor(bias_shape, data_type, 1); - - // Create post op tensors and populate post op with them - std::vector post_op_tensors_holder{}; - auto populated_post_ops = experimental::transform_post_op_list_arguments(post_ops, - [&post_op_tensors_holder, &data_type](auto shape) - { - auto t = create_tensor(shape, data_type, 1); - post_op_tensors_holder.push_back(std::move(t)); - return post_op_tensors_holder.back().info(); - }); - TensorType lhs_reshaped; - TensorType rhs_reshaped; - TensorType dst; - - const unsigned int M = lhs_shape[1]; - const unsigned int N = rhs_shape[0]; - const unsigned int K = lhs_shape[0]; - GEMMKernelInfo kernel_info; - kernel_info.m = M; - kernel_info.n = N; - kernel_info.k = K; - kernel_info.depth_output_gemm3d = 0; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = act_info; - kernel_info.fp_mixed_precision = fp_mixed_precision; - kernel_info.post_ops = populated_post_ops; - - // The output tensor will be auto-initialized within the function - - // Create and configure function - ReshapeLHSOperatorType reshape_lhs; - ReshapeRHSOperatorType reshape_rhs; - GEMMOperatorType gemm; - - validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); - validate_result = validate_result || !rhs_info.export_to_cl_image; - if(!validate_result) - { - return nullptr; - } - - reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); - reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); - gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - - ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); - } - - // We do not pad when using image as it needs to comply to strict pitch alignment restrictions - if(!rhs_info.export_to_cl_image) - { - add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); - for(auto &tensor : post_op_tensors_holder) - { - add_padding_x({ &tensor }); - } - } - - // Allocate tensors - lhs.allocator()->allocate(); - rhs.allocator()->allocate(); - lhs_reshaped.allocator()->allocate(); - rhs_reshaped.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - for(auto &tensor : post_op_tensors_holder) - { - tensor.allocator()->allocate(); - } - - ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); - } - - // Fill tensors - fill(AccessorType(lhs), 0); - fill(AccessorType(rhs), 1); - fill(AccessorType(bias), 2); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); - } - - // Compute GEMM - ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; - reshape_lhs.run(reshape_lhs_pack); - ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; - reshape_rhs.run(reshape_rhs_pack); - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, - { ACL_SRC_1, &rhs_reshaped }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); - } - gemm.run(gemm_pack); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, - const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) - { - TensorShape dst_shape = lhs_shape; - dst_shape[0] = rhs_shape[0]; - dst_shape[1] = lhs_shape[1]; - - // Create reference - SimpleTensor lhs{ lhs_shape, data_type, 1 }; - SimpleTensor rhs{ rhs_shape, data_type, 1 }; - SimpleTensor bias{ dst_shape, data_type, 1 }; - // Create post op tensors and populate post op with them - auto populated_post_ops = experimental::transform_post_op_list_arguments>(post_ops, [&data_type](auto shape) - { - return SimpleTensor { shape, data_type, 1 }; - }); - - const int n = rhs_shape[0]; - const int m = lhs_shape[1]; - const int batch_size = lhs_shape[2]; - - // Fill reference - int tensor_idx = 0; - fill(lhs, tensor_idx++); - fill(rhs, tensor_idx++); - fill(bias, tensor_idx++); - for(auto &op : populated_post_ops.get_list()) - { - for(auto tensor : op->arguments()) - { - fill(*tensor, tensor_idx++); - } - } - - if(broadcast_bias) - { - // In case of broadcast, we need to simply copy the first into the following "M" ones - for(int i = 1; i < m * batch_size; i++) - { - memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); - } - } - - SimpleTensor out; - if(fp_mixed_precision) - { - out = reference::gemm_mixed_precision(lhs, rhs, bias, alpha, beta); - } - else - { - out = reference::gemm(lhs, rhs, bias, alpha, beta); - } - // Ignore activation info if post ops are used instead - if(populated_post_ops.size() > 0) - { - out = reference::post_ops(out, populated_post_ops); - } - else - { - out = reference::activation_layer(out, act_info); - } - return out; - } - - bool validate_result = true; - TensorType _target{}; - SimpleTensor _reference{}; -}; - template class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture { public: - void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, - bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) - { - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = m0; - lhs_info.k0 = k0; - lhs_info.v0 = v0; - lhs_info.interleave = interleave_lhs; - lhs_info.transpose = lhs_transpose; - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = n0; - rhs_info.k0 = k0; - rhs_info.h0 = h0; - rhs_info.interleave = interleave_rhs; - rhs_info.transpose = !lhs_transpose; - rhs_info.export_to_cl_image = export_to_cl_image; - - // In case of GEMM3D, m is the product between m_w and m_h - const unsigned int m = m_w * m_h; - - // Set the tensor shapes for LHS and RHS matrices - const TensorShape lhs_shape(k, m, batch_size); - const TensorShape rhs_shape(n, k, batch_size); - const TensorShape bias_shape(n, 1, 1); - - _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); - if(validate_result) - { - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); - } - } - -protected: - template - void fill(U &&tensor, int i) - { - static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); - using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; - - DistributionType distribution{ T(-1.0f), T(1.0f) }; - library->fill(tensor, distribution, i); - } - - TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) - { - // Create tensors - TensorType lhs = create_tensor(lhs_shape, data_type, 1); - TensorType rhs = create_tensor(rhs_shape, data_type, 1); - TensorType bias = create_tensor(bias_shape, data_type, 1); - TensorType lhs_reshaped; - TensorType rhs_reshaped; - TensorType dst; - - const unsigned int M = lhs_shape[1]; - const unsigned int N = rhs_shape[0]; - const unsigned int K = lhs_shape[0]; - GEMMKernelInfo kernel_info; - kernel_info.m = M; - kernel_info.n = N; - kernel_info.k = K; - kernel_info.depth_output_gemm3d = m_h; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = true; - kernel_info.activation_info = act_info; - kernel_info.fp_mixed_precision = fp_mixed_precision; - - // The output tensor will be auto-initialized within the function - - // Create and configure function - ReshapeLHSOperatorType reshape_lhs; - ReshapeRHSOperatorType reshape_rhs; - GEMMOperatorType gemm; - - validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); - validate_result = validate_result || !rhs_info.export_to_cl_image; - if(!validate_result) - { - return nullptr; - } - - reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); - reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); - gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - - ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - - // We do not pad when using image as it needs to comply to strict pitch alignment restrictions - if(!rhs_info.export_to_cl_image) - { - add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); - } - - // Allocate tensors - lhs.allocator()->allocate(); - rhs.allocator()->allocate(); - lhs_reshaped.allocator()->allocate(); - rhs_reshaped.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - - ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); - - // Fill tensors - fill(AccessorType(lhs), 0); - fill(AccessorType(rhs), 1); - fill(AccessorType(bias), 2); - - // Compute GEMM - ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; - reshape_lhs.run(reshape_lhs_pack); - ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; - reshape_rhs.run(reshape_rhs_pack); - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, - { ACL_SRC_1, &rhs_reshaped }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); - gemm.run(gemm_pack); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, - const ActivationLayerInfo &act_info) - { - TensorShape dst_shape = lhs_shape; - dst_shape.set(0, rhs_shape[0]); - dst_shape.set(1, lhs_shape[1] / m_h); - dst_shape.set(2, m_h); - dst_shape.set(3, lhs_shape[2]); - - // Create reference - SimpleTensor lhs{ lhs_shape, data_type, 1 }; - SimpleTensor rhs{ rhs_shape, data_type, 1 }; - SimpleTensor bias{ dst_shape, data_type, 1 }; - - const int n = rhs_shape[0]; - const int m = lhs_shape[1]; - const int batch_size = lhs_shape[2]; - - // Fill reference - fill(lhs, 0); - fill(rhs, 1); - fill(bias, 2); - - // In case of broadcast, we need to simply copy the first into the following "M" ones - for(int i = 1; i < m * batch_size; i++) - { - memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); - } - - if(fp_mixed_precision) - { - return reference::activation_layer(reference::gemm_mixed_precision(lhs, rhs, bias, alpha, beta), act_info); - } - else - { - return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); - } - } - - bool validate_result = true; - TensorType _target{}; - SimpleTensor _reference{}; -}; - -template -class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture -{ -public: - void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, - bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) + void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, + bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = m0; - lhs_info.k0 = k0; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + lhs_info.v0 = v0; + lhs_info.interleave = interleave_lhs; + lhs_info.transpose = lhs_transpose; GEMMRHSMatrixInfo rhs_info; rhs_info.n0 = n0; rhs_info.k0 = k0; rhs_info.h0 = h0; rhs_info.interleave = interleave_rhs; - rhs_info.transpose = transpose_rhs; + rhs_info.transpose = !lhs_transpose; rhs_info.export_to_cl_image = export_to_cl_image; + // In case of GEMM3D, m is the product between m_w and m_h + const unsigned int m = m_w * m_h; + // Set the tensor shapes for LHS and RHS matrices const TensorShape lhs_shape(k, m, batch_size); const TensorShape rhs_shape(n, k, batch_size); - const TensorShape bias_shape(n, - broadcast_bias ? 1 : m, - broadcast_bias ? 1 : batch_size); + const TensorShape bias_shape(n, 1, 1); - _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info); if(validate_result) { - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); + _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info); } } @@ -1437,19 +998,16 @@ protected: DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); - - // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) - DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; - library->fill_borders_with_garbage(tensor, distribution_inf, i); } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) + DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); TensorType rhs = create_tensor(rhs_shape, data_type, 1); TensorType bias = create_tensor(bias_shape, data_type, 1); + TensorType lhs_reshaped; TensorType rhs_reshaped; TensorType dst; @@ -1460,14 +1018,16 @@ protected: kernel_info.m = M; kernel_info.n = N; kernel_info.k = K; - kernel_info.depth_output_gemm3d = 0; + kernel_info.depth_output_gemm3d = m_h; kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; + kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; + kernel_info.fp_mixed_precision = fp_mixed_precision; // The output tensor will be auto-initialized within the function // Create and configure function + ReshapeLHSOperatorType reshape_lhs; ReshapeRHSOperatorType reshape_rhs; GEMMOperatorType gemm; @@ -1478,8 +1038,9 @@ protected: return nullptr; } + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); - gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); @@ -1488,18 +1049,20 @@ protected: // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { - add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); } // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); + lhs_reshaped.allocator()->allocate(); rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); @@ -1510,24 +1073,27 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; reshape_rhs.run(reshape_rhs_pack); - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, + SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; - dst_shape[0] = rhs_shape[0]; - dst_shape[1] = lhs_shape[1]; + dst_shape.set(0, rhs_shape[0]); + dst_shape.set(1, lhs_shape[1] / m_h); + dst_shape.set(2, m_h); + dst_shape.set(3, lhs_shape[2]); // Create reference SimpleTensor lhs{ lhs_shape, data_type, 1 }; @@ -1543,16 +1109,20 @@ protected: fill(rhs, 1); fill(bias, 2); - if(broadcast_bias) + // In case of broadcast, we need to simply copy the first into the following "M" ones + for(int i = 1; i < m * batch_size; i++) { - // In case of broadcast, we need to simply copy the first into the following "M" ones - for(int i = 1; i < m * batch_size; i++) - { - memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); - } + memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); } - return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); + if(fp_mixed_precision) + { + return reference::activation_layer(reference::gemm_mixed_precision(lhs, rhs, bias, alpha, beta), act_info); + } + else + { + return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); + } } bool validate_result = true; @@ -1560,15 +1130,12 @@ protected: SimpleTensor _reference{}; }; -/** (EXPERIMENTAL_POST_OPS)*/ template -class GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture : public framework::Fixture +class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture { public: - using PostOpArgBroadcast = std::tuple; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, - bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, - const experimental::PostOpList &post_ops) + bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = m0; @@ -1588,21 +1155,11 @@ public: const TensorShape bias_shape(n, broadcast_bias ? 1 : m, broadcast_bias ? 1 : batch_size); - auto post_ops_with_shapes = experimental::transform_post_op_list_arguments(post_ops, - [ = ](auto broadcast) - { - return TensorShape - { - std::get<0>(broadcast) ? 1 : n, - std::get<1>(broadcast) ? 1 : m, - std::get<2>(broadcast) ? 1 : batch_size, - }; - }); - _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); if(validate_result) { - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); + _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); } } @@ -1622,7 +1179,7 @@ protected: } TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) + DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { // Create tensors TensorType lhs = create_tensor(lhs_shape, data_type, 1); @@ -1630,15 +1187,6 @@ protected: TensorType bias = create_tensor(bias_shape, data_type, 1); TensorType rhs_reshaped; TensorType dst; - // Create post op tensors and populate post op with them - std::vector post_op_tensors_holder{}; - auto populated_post_ops = experimental::transform_post_op_list_arguments(post_ops, - [&post_op_tensors_holder, &data_type](auto shape) - { - auto t = create_tensor(shape, data_type, 1); - post_op_tensors_holder.push_back(std::move(t)); - return post_op_tensors_holder.back().info(); - }); const unsigned int M = lhs_shape[1]; const unsigned int N = rhs_shape[0]; @@ -1651,7 +1199,6 @@ protected: kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = act_info; - kernel_info.post_ops = populated_post_ops; // The output tensor will be auto-initialized within the function @@ -1672,19 +1219,11 @@ protected: ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); - } // We do not pad when using image as it needs to comply to strict pitch alignment restrictions if(!rhs_info.export_to_cl_image) { add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); - for(auto &tensor : post_op_tensors_holder) - { - add_padding_x({ &tensor }); - } } // Allocate tensors @@ -1693,29 +1232,17 @@ protected: rhs_reshaped.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); - for(auto &tensor : post_op_tensors_holder) - { - tensor.allocator()->allocate(); - } ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); - } // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); fill(AccessorType(bias), 2); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); - } // Compute GEMM ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; @@ -1723,19 +1250,14 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); - } + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; } SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, - const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) + const ActivationLayerInfo &act_info) { TensorShape dst_shape = lhs_shape; dst_shape[0] = rhs_shape[0]; @@ -1745,28 +1267,15 @@ protected: SimpleTensor lhs{ lhs_shape, data_type, 1 }; SimpleTensor rhs{ rhs_shape, data_type, 1 }; SimpleTensor bias{ dst_shape, data_type, 1 }; - // Create post op tensors and populate post op with them - auto populated_post_ops = experimental::transform_post_op_list_arguments>(post_ops, [&data_type](auto shape) - { - return SimpleTensor { shape, data_type, 1 }; - }); const int n = rhs_shape[0]; const int m = lhs_shape[1]; const int batch_size = lhs_shape[2]; // Fill reference - int tensor_idx = 0; - fill(lhs, tensor_idx++); - fill(rhs, tensor_idx++); - fill(bias, tensor_idx++); - for(auto &op : populated_post_ops.get_list()) - { - for(auto tensor : op->arguments()) - { - fill(*tensor, tensor_idx++); - } - } + fill(lhs, 0); + fill(rhs, 1); + fill(bias, 2); if(broadcast_bias) { @@ -1777,18 +1286,7 @@ protected: } } - SimpleTensor out; - out = reference::gemm(lhs, rhs, bias, alpha, beta); - // Ignore activation info if post ops are used instead - if(populated_post_ops.size() > 0) - { - out = reference::post_ops(out, populated_post_ops); - } - else - { - out = reference::activation_layer(out, act_info); - } - return out; + return reference::activation_layer(reference::gemm(lhs, rhs, bias, alpha, beta), act_info); } bool validate_result = true; @@ -1921,8 +1419,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -2057,8 +1554,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -2101,212 +1597,6 @@ protected: SimpleTensor _reference{}; }; -template -class GEMMMatrixMultiplyNativeWithPostOpsValidationFixture : public framework::Fixture -{ -public: - using PostOpArgBroadcast = std::tuple; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument -public: - void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, - const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) - { - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = m0; - lhs_info.k0 = k0; - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = n0; - rhs_info.k0 = k0; - - // Set the tensor shapes for LHS and RHS matrices - const TensorShape lhs_shape(k, m, batch_size); - const TensorShape rhs_shape(n, k, batch_size); - const TensorShape bias_shape(n, - broadcast_bias ? 1 : m, - broadcast_bias ? 1 : batch_size); - const auto post_ops_with_shapes = experimental::transform_post_op_list_arguments(post_ops, - [ = ](auto broadcast) - { - return TensorShape - { - std::get<0>(broadcast) ? 1 : n, - std::get<1>(broadcast) ? 1 : m, - std::get<2>(broadcast) ? 1 : batch_size, - }; - }); - - _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); - _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes); - } - -protected: - template - void fill(U &&tensor, int i) - { - static_assert(std::is_floating_point::value || std::is_same::value, "Only floating point data types supported."); - using DistributionType = typename std::conditional::value, arm_compute::utils::uniform_real_distribution_16bit, std::uniform_real_distribution>::type; - - DistributionType distribution{ T(-1.0f), T(1.0f) }; - library->fill(tensor, distribution, i); - - // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) - DistributionType distribution_inf{ T(std::numeric_limits::infinity()), T(std::numeric_limits::infinity()) }; - library->fill_borders_with_garbage(tensor, distribution_inf, i); - } - - TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, - DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) - { - // Create tensors - TensorType lhs = create_tensor(lhs_shape, data_type, 1); - TensorType rhs = create_tensor(rhs_shape, data_type, 1); - TensorType bias = create_tensor(bias_shape, data_type, 1); - TensorType dst; - // Create post op tensors and populate post op with them - std::vector post_op_tensors_holder{}; - auto populated_post_ops = experimental::transform_post_op_list_arguments(post_ops, - [&post_op_tensors_holder, &data_type](auto shape) - { - auto t = create_tensor(shape, data_type, 1); - post_op_tensors_holder.push_back(std::move(t)); - return post_op_tensors_holder.back().info(); - }); - - const unsigned int M = lhs_shape[1]; - const unsigned int N = rhs_shape[0]; - const unsigned int K = lhs_shape[0]; - GEMMKernelInfo kernel_info; - kernel_info.m = M; - kernel_info.n = N; - kernel_info.k = K; - kernel_info.depth_output_gemm3d = 0; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = act_info; - kernel_info.post_ops = populated_post_ops; - - // Create and configure function - GEMMOperatorType gemm; - gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - - ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); - } - - add_padding_x({ &lhs, &rhs, &bias, &dst }); - for(auto &tensor : post_op_tensors_holder) - { - add_padding_x({ &tensor }); - } - - // Allocate tensors - lhs.allocator()->allocate(); - rhs.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - for(auto &tensor : post_op_tensors_holder) - { - tensor.allocator()->allocate(); - } - - ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); - for(const auto &tensor : post_op_tensors_holder) - { - ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); - } - - // Fill tensors - fill(AccessorType(lhs), 0); - fill(AccessorType(rhs), 1); - fill(AccessorType(bias), 2); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i); - } - - // Compute GEMM - ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, - { ACL_SRC_1, &rhs }, - { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); - for(size_t i = 0; i < post_op_tensors_holder.size(); ++i) - { - gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i)); - } - gemm.run(gemm_pack); - - return dst; - } - - SimpleTensor compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, - const ActivationLayerInfo &act_info, const experimental::PostOpList &post_ops) - { - TensorShape dst_shape = lhs_shape; - dst_shape[0] = rhs_shape[0]; - dst_shape[1] = lhs_shape[1]; - - // Create reference - SimpleTensor lhs{ lhs_shape, data_type, 1 }; - SimpleTensor rhs{ rhs_shape, data_type, 1 }; - SimpleTensor bias{ dst_shape, data_type, 1 }; - // Create post op tensors and populate post op with them - auto populated_post_ops = experimental::transform_post_op_list_arguments>(post_ops, [&data_type](auto shape) - { - return SimpleTensor { shape, data_type, 1 }; - }); - - const int n = rhs_shape[0]; - const int m = lhs_shape[1]; - const int batch_size = lhs_shape[2]; - - // Fill reference - int tensor_idx = 0; - fill(lhs, tensor_idx++); - fill(rhs, tensor_idx++); - fill(bias, tensor_idx++); - for(auto &op : populated_post_ops.get_list()) - { - for(auto tensor : op->arguments()) - { - fill(*tensor, tensor_idx++); - } - } - - if(broadcast_bias) - { - // In case of broadcast, we need to simply copy the first into the following "M" ones - for(int i = 1; i < m * batch_size; i++) - { - memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); - } - } - - SimpleTensor out; - out = reference::gemm(lhs, rhs, bias, alpha, beta); - // Ignore activation info if post ops are used instead - if(populated_post_ops.size() > 0) - { - out = reference::post_ops(out, populated_post_ops); - } - else - { - out = reference::activation_layer(out, act_info); - } - return out; - } - - TensorType _target{}; - SimpleTensor _reference{}; -}; - template class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture { @@ -2398,8 +1688,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -2557,8 +1846,7 @@ protected: ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, - { ACL_DST, &dst } - }); + { ACL_DST, &dst } }); gemm.run(gemm_pack); return dst; @@ -2608,4 +1896,4 @@ protected: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H -- cgit v1.2.1