From 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 14 Oct 2019 19:03:09 +0100 Subject: COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- src/core/NEON/kernels/arm_gemm/profiler.hpp | 137 ---------------------------- 1 file changed, 137 deletions(-) delete mode 100644 src/core/NEON/kernels/arm_gemm/profiler.hpp (limited to 'src/core/NEON/kernels/arm_gemm/profiler.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/profiler.hpp b/src/core/NEON/kernels/arm_gemm/profiler.hpp deleted file mode 100644 index 1b944c4ccd..0000000000 --- a/src/core/NEON/kernels/arm_gemm/profiler.hpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#pragma once - -#ifdef CYCLE_PROFILING - -#include "../perf.h" - -#ifndef NO_MULTI_THREADING -#include -#endif - -namespace arm_gemm { - -#ifndef NO_MULTI_THREADING -extern std::mutex report_mutex; -#endif - -class profiler { -private: - static const int maxevents = 100000; - unsigned long times[maxevents] = { }; - unsigned long units[maxevents] = { }; - int events[maxevents] = { }; - int currentevent=0; - int countfd=0; - - class ScopedProfilerClass { - private: - profiler &_parent; - bool legal=false; - - public: - ScopedProfilerClass(profiler &prof, int i, unsigned long u) : _parent(prof) { - if (prof.currentevent==maxevents) - return; - - prof.events[prof.currentevent]=i; - prof.units[prof.currentevent]=u; - legal=true; - start_counter(prof.countfd); - } - - ~ScopedProfilerClass() { - if (!legal) return; - - long long cycs = stop_counter(_parent.countfd); - _parent.times[_parent.currentevent++] = cycs; - } - }; - -public: - profiler() { - countfd=open_cycle_counter(); - } - - ~profiler() { - close(countfd); - int tots[5]; - unsigned long counts[5]; - unsigned long tunits[5]; - const char * descs[] = { "Prepare A", "Prepare B", "Kernel", "Merge" }; - - for (int i=1; i<5; i++) { - tots[i] = 0; - counts[i] = 0; - tunits[i] = 0; - } - - for (int i=0; i lock(report_mutex); - printf("Profiled events (cpu %d):\n", sched_getcpu()); -#endif - - printf("%20s %9s %9s %9s %12s %9s\n", "", "Events", "Total", "Average", "Bytes/MACs", "Per cycle"); - for (int i=1; i<5; i++) { - printf("%20s: %9d %9ld %9ld %12lu %9.2f\n",descs[i-1],tots[i],counts[i],counts[i]/tots[i],tunits[i],(float)tunits[i]/counts[i]); - } - } - - template - void operator() (int i, unsigned long u, T func) { - if (currentevent==maxevents) { - func(); - } else { - events[currentevent] = i; - units[currentevent] = u; - start_counter(countfd); - func(); - long long cycs = stop_counter(countfd); - times[currentevent++] = cycs; - } - } - - ScopedProfilerClass ScopedProfiler(int i, unsigned long u) { - return ScopedProfilerClass(*this, i, u); - } -}; - -#endif // CYCLE_PROFILING - -} // namespace arm_gemm - -#define PROFILE_PREPA 1 -#define PROFILE_PREPB 2 -#define PROFILE_KERNEL 3 -#define PROFILE_MERGE 4 -- cgit v1.2.1