aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve2/qsymm16.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/sve2/qsymm16.cpp121
1 files changed, 66 insertions, 55 deletions
diff --git a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp
index 5154fac8a7..f955893307 100644
--- a/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp
+++ b/src/cpu/kernels/activation/generic/sve2/qsymm16.cpp
@@ -21,24 +21,27 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
-#include <cmath>
-#include <cstddef>
-
#include "src/core/NEON/SVEMath.h"
#include "src/core/NEON/SVESymm.h"
+
#include <arm_sve.h>
+#include <cmath>
+#include <cstddef>
namespace arm_compute
{
namespace cpu
{
-void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+void sve2_qsymm16_activation(const ITensor *src,
+ ITensor *dst,
+ const ActivationLayerInfo &act_info,
+ const Window &window)
{
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
@@ -56,62 +59,70 @@ void sve2_qsymm16_activation(const ITensor *src, ITensor *dst, const ActivationL
const auto va_f32 = svdup_n_f32(act_info.a());
const auto vb_f32 = svdup_n_f32(act_info.b());
- execute_window_loop(win_collapsed, [&](const Coordinates &)
- {
- const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
- svint16_t tmp;
+ svint16_t tmp;
- int x = window_start_x;
- svbool_t pg = svwhilelt_b16(x, window_end_x);
- do
- {
- const auto vin = svld1_s16(pg, input_ptr + x);
- if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
- {
- // De-quantize
- auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
- // Perform activation
- const svfloat32x2_t tmp_dep = svcreate2_f32(svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))),
- svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1))))));
- // Re-quantize to new output space
- tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
- }
- else if(act == ActivationLayerInfo::ActivationFunction::TANH)
- {
- // De-quantize
- auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
- // Perform activation
- const svfloat32x2_t tmp_dep = svcreate2_f32(svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))),
- svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32))));
- // Re-quantize to new output space
- tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
- }
- else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
- {
- // De-quantize
- auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
- // Perform activation
- const svfloat32x2_t tmp_dep = svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))),
- svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1))));
- // Re-quantize to new output space
- tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
- }
- else
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b16(x, window_end_x);
+ do
{
- ARM_COMPUTE_ERROR("Unsupported activation function");
- }
+ const auto vin = svld1_s16(pg, input_ptr + x);
+ if (act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+ // Perform activation
+ const svfloat32x2_t tmp_dep = svcreate2_f32(
+ svdiv_f32_z(
+ pg, vconst_1,
+ svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))),
+ svdiv_f32_z(
+ pg, vconst_1,
+ svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1))))));
+ // Re-quantize to new output space
+ tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+ }
+ else if (act == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ // De-quantize
+ auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+ // Perform activation
+ const svfloat32x2_t tmp_dep = svcreate2_f32(
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32))));
+ // Re-quantize to new output space
+ tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+ }
+ else if (act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // De-quantize
+ auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+ // Perform activation
+ const svfloat32x2_t tmp_dep =
+ svcreate2_f32(svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 0))),
+ svmin_f32_z(pg, va_f32, svmax_f32_z(pg, vb_f32, svget2_f32(vin_deq, 1))));
+ // Re-quantize to new output space
+ tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
- svst1_s16(pg, output_ptr + x, tmp);
+ svst1_s16(pg, output_ptr + x, tmp);
- x += svcnth();
- pg = svwhilelt_b16(x, window_end_x);
+ x += svcnth();
+ pg = svwhilelt_b16(x, window_end_x);
- }
- while(svptest_any(svptrue_b16(), pg));
- },
- input, output);
+ } while (svptest_any(svptrue_b16(), pg));
+ },
+ input, output);
}
} // namespace cpu
} // namespace arm_compute