From a0bf9130a266332eadb69139296c96c66d401e12 Mon Sep 17 00:00:00 2001 From: Eren Kopuz Date: Wed, 24 Jun 2020 17:29:38 +0100 Subject: COMPMID-3453: Modify GemmTuner.py to output JSON - Added functionality to parse multiple OpenCL_ms measurements and sum - Results are now outputted in 4 JSON files as specified in ticket COMPMID-3453 Change-Id: I9241e9eeaee7c4979e877f87736deaee3cfd38e4 Signed-off-by: Eren Kopuz Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3492 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins --- examples/gemm_tuner/GemmTuner.py | 311 ++++++++++++++++++++++++++------------- 1 file changed, 211 insertions(+), 100 deletions(-) diff --git a/examples/gemm_tuner/GemmTuner.py b/examples/gemm_tuner/GemmTuner.py index 8bc0d3a6bf..aab2d55e89 100644 --- a/examples/gemm_tuner/GemmTuner.py +++ b/examples/gemm_tuner/GemmTuner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Arm Limited. +# Copyright (c) 2019-2020 ARM Limited. # # SPDX-License-Identifier: MIT # @@ -41,6 +41,8 @@ from typing import Deque, Dict, Generator, List, NamedTuple, Set, Tuple, Union Strategy = Enum("Strategy", ["Native", "ReshapedOnlyRHS", "Reshaped"]) # Gemm parameter + + class GEMMParam(NamedTuple): M: int # Number of lhs matrix rows N: int # Number of rhs matrix columns @@ -52,7 +54,7 @@ class GEMMParam(NamedTuple): return GEMMParam(*map(int, args)) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Native @@ -63,11 +65,11 @@ class NativeGEMMConfig(NamedTuple): @staticmethod def parse_from_strs(*args): - *mnk, = map(int, args) + (*mnk,) = map(int, args) return NativeGEMMConfig(*mnk) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Reshaped Only RHS @@ -75,9 +77,12 @@ class ReshapedOnlyRHSGEMMConfig(NamedTuple): m0: int # Number of rows processed by the matrix multiplication n0: int # Number of columns processed by the matrix multiplication k0: int # Number of partial accumulations performed by the matrix multiplication - h0: int # Number of horizontal blocks of size (k0xn0) stored on the same output row - interleave_rhs: bool # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) - transpose_rhs: bool # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + # Number of horizontal blocks of size (k0xn0) stored on the same output row + h0: int + # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) + interleave_rhs: bool + # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + transpose_rhs: bool @staticmethod def parse_from_strs(*args): @@ -87,7 +92,7 @@ class ReshapedOnlyRHSGEMMConfig(NamedTuple): return ReshapedOnlyRHSGEMMConfig(*mnkh, interleave_rhs, transpose_rhs) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Reshaped @@ -95,11 +100,16 @@ class ReshapedGEMMConfig(NamedTuple): m0: int # Number of rows processed by the matrix multiplication n0: int # Number of columns processed by the matrix multiplication k0: int # Number of partial accumulations performed by the matrix multiplication - v0: int # Number of vertical blocks of size (m0xk0) stored on the same output row - h0: int # Number of horizontal blocks of size (k0xn0) stored on the same output row - interleave_lhs: bool # Interleave lhs matrix (1) / Do not interleave lhs matrix (0) - interleave_rhs: bool # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) - transpose_rhs: bool # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + # Number of vertical blocks of size (m0xk0) stored on the same output row + v0: int + # Number of horizontal blocks of size (k0xn0) stored on the same output row + h0: int + # Interleave lhs matrix (1) / Do not interleave lhs matrix (0) + interleave_lhs: bool + # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) + interleave_rhs: bool + # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + transpose_rhs: bool @staticmethod def parse_from_strs(*args): @@ -110,40 +120,67 @@ class ReshapedGEMMConfig(NamedTuple): return ReshapedGEMMConfig(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Measurement we take from the benchmark result. class Measurement(NamedTuple): - opencl_timer_ms: float + opencl_timer_ms_reshape: float + opencl_timer_ms_kernel: float + + def get_total_ms(self): + return self.opencl_timer_ms_reshape + self.opencl_timer_ms_kernel def is_close_to(self, other, tol): - return math.fabs(self.opencl_timer_ms - other.opencl_timer_ms) < tol + return math.fabs(self.get_total_ms() - other.get_total_ms()) < tol def is_better_than(self, other, tol): - return self < other and not self.is_close_to(other) + return self.get_total_ms() < other.get_total_ms() and not self.is_close_to( + other + ) def __add__(self, other): - return Measurement(self.opencl_timer_ms + other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape + other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel + other.opencl_timer_ms_kernel, + ) def __sub__(self, other): - return Measurement(self.opencl_timer_ms - other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape - other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel - other.opencl_timer_ms_kernel, + ) def __mul__(self, other): - return Measurement(self.opencl_timer_ms * other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape * other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel * other.opencl_timer_ms_kernel, + ) def __floordiv__(self, other): - return Measurement(self.opencl_timer_ms // other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape // other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel // other.opencl_timer_ms_kernel, + ) def __truediv__(self, other): - return Measurement(self.opencl_timer_ms / other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape / other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel / other.opencl_timer_ms_kernel, + ) def __pow__(self, power): - return Measurement(self.opencl_timer_ms ** power) + return Measurement( + self.opencl_timer_ms_reshape ** power, self.opencl_timer_ms_kernel ** power + ) + + def __str__(self): + return ",".join(map(str, self)) # GEMMConfig Type -GEMMConfigT = Union[NativeGEMMConfig, ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig] +GEMMConfigT = Union[NativeGEMMConfig, + ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig] # Representation of the benchmark result from a single experiment @@ -154,24 +191,6 @@ class BenchmarkResult(NamedTuple): measurement: Measurement -# Representation of a single row of BenchmarkResult in CSV -# NOTE: In the CSV representation, we merge all fields of Gemm Config into a single field "GEMMConfig", but keep the -# fields of GEMMParam and Measurement -# The example entry including header would look like: -# M , N , K , B, Strategy , GEMMConfig , OpenCLTimer_MS -# 1225, 32, 192, 1, Reshaped , 4-4-4-3-1-1-1-0 , 0.3309 -BenchmarkResultCSVRow = namedtuple( - "BenchmarkResultCSVRow", GEMMParam._fields + ("Strategy", "GEMMConfig") + Measurement._fields -) - - -def benchmark_result_2_csv_row(result: BenchmarkResult) -> BenchmarkResultCSVRow: - """ Convert a BenchmarkResult into its CSV row form """ - return BenchmarkResultCSVRow( - *(result.gemm_param + (result.strategy.name, str(result.gemm_config)) + result.measurement) - ) - - class GEMMBenchmarkResultRecorder: """ A recorder that records and organises GEMM Benchmark results, and produces various reports on the record. """ @@ -210,7 +229,9 @@ class GEMMBenchmarkResultRecorder: best_gc_set = best_gc_sets.setdefault((gemm_param, strategy), []) best_gc_set.append((gemm_config, measurement)) # Sort the best config set (list) - best_gc_set = sorted(best_gc_set, key=lambda gc_and_m: gc_and_m[1]) + best_gc_set = sorted( + best_gc_set, key=lambda gc_and_m: gc_and_m[1].get_total_ms() + ) # Filter out configs that are beyond tolerance to the best GEMMConfig's measurement best_gc, best_m = best_gc_set[0] best_gc_set_new = [ @@ -228,9 +249,14 @@ class GEMMBenchmarkResultRecorder: """ Get the best GEMMConfig set per GEMMParam per Strategy, and flatten the result into a sequence of BenchmarkResults """ - for (gemm_param, strategy), best_gc_sets in self.get_best_gemm_configs().items(): + for ( + (gemm_param, strategy), + best_gc_sets, + ) in self.get_best_gemm_configs().items(): for best_gemm_config, best_measurement in best_gc_sets: - yield BenchmarkResult(gemm_param, strategy, best_gemm_config, best_measurement) + yield BenchmarkResult( + gemm_param, strategy, best_gemm_config, best_measurement + ) def get_config_distributions(self): """ Return GEMMConfigDistribution for each strategy @@ -244,38 +270,72 @@ class GEMMBenchmarkResultRecorder: return gemm_config_distributions - def save_to_csvs(self, out_dir, only_best_config=True): - """ Save records to an output directory of csv files. - The directory is organized such that each strategy gets its own CSV file. + def get_best_gemm_strategies(self): + """ Get the best Stratey per GEMMParam + """ + all_results: Dict[GEMMParam, List[Tuple[Strategy, Measurement]]] = defaultdict( + list + ) + + best_strategies: Dict[GEMMParam, Strategy] = {} + + for gemm_param, strategy, gemm_config, measurement in self.get_record(): + all_results[gemm_param].append((strategy, measurement)) + + for gemm_param, results_set in all_results.items(): + # Sort the best results set (list) + results_set = sorted( + results_set, key=lambda s_and_m: s_and_m[1].get_total_ms() + ) + # Select best Strategy + best_s, best_m = results_set[0] + best_strategies[gemm_param] = best_s + + return best_strategies + + def save_to_jsons(self, out_dir, only_best_config=True): + """ Save records to an output directory of JSON files. + The directory is organized such that each strategy gets its own JSON file. + The directory also includes a JSON file to define the best strategy per GEMM Param. """ if not os.path.exists(out_dir): - logging.info("Output directory {} does not exist. Creating...".format(out_dir)) + logging.info( + "Output directory {} does not exist. Creating...".format( + out_dir) + ) os.mkdir(out_dir) + + out_json_path = os.path.join(out_dir, "gemm_type_selection.json") + if check_out_path(out_json_path): + results = self.get_best_gemm_strategies() + results = {str(key): value.name for key, value in results.items()} + dump_json(out_json_path, results) + for strategy in self._strategies: - out_csv_path = os.path.join(out_dir, strategy.name) - if os.path.exists(out_csv_path): - overwrite = ( - input( - "Output CSV {} already exists. Overwrite? [Y/N]: ".format(out_csv_path) - ).lower() - == "y" - ) - if not overwrite: - logging.info("Skipping {}".format(out_csv_path)) - continue - logging.info("Saving csv file to {}".format(out_csv_path)) - record = ( - self.get_best_gemm_configs_as_sequence() if only_best_config else self.get_record() + out_json_path = os.path.join( + out_dir, ("gemm_config_" + strategy.name.lower() + ".json") ) - with open(out_csv_path, "w") as f: - csv_writer = csv.DictWriter(f, fieldnames=BenchmarkResultCSVRow._fields) - csv_writer.writeheader() - csv_writer.writerows( - benchmark_result_2_csv_row(res)._asdict() - for res in record - if res.strategy == strategy + if check_out_path(out_json_path): + record = ( + self.get_best_gemm_configs_as_sequence() + if only_best_config + else self.get_record() ) - logging.info("Saved") + results = defaultdict(list) + for res in record: + if res.strategy == strategy: + results[str(res.gemm_param)].append( + { + "GEMMConfig": str(res.gemm_config), + "OpenCL_Timer_ms_reshape": str( + res.measurement.opencl_timer_ms_reshape + ), + "OpenCL_Timer_ms_kernel": str( + res.measurement.opencl_timer_ms_kernel + ), + } + ) + dump_json(out_json_path, results) def summary(self, sum_level=SummaryLevel.Short): """ Return the summary string of the record @@ -314,9 +374,9 @@ class GEMMConfigDistribution: def __init__(self): """ Initializer """ - self._gemm_config_dist: Dict[GEMMConfig, List[Tuple[GEMMParam, Measurement]]] = defaultdict( - list - ) + self._gemm_config_dist: Dict[ + GEMMConfig, List[Tuple[GEMMParam, Measurement]] + ] = defaultdict(list) self._gemm_config_freq = Counter() def add(self, benchmark_result: BenchmarkResult): @@ -439,30 +499,45 @@ def extract_benchmark_results( # Get gemm params + gemm configs from example args benchmark_args = parse_benchmark_commandline(json_res["CommandLine"]) Gemm_Example_Args_T = GEMM_EXAMPLE_ARGS_FACTORY[strategy] - example_args = Gemm_Example_Args_T(*(benchmark_args["example_args"].split(","))) + example_args = Gemm_Example_Args_T( + *(benchmark_args["example_args"].split(","))) # Gemm_Example_Arg consists of GEMMParam first and then GEMMConfig (in that order) gemm_param_fields_len = len(GEMMParam._fields) - gemm_param = GEMMParam.parse_from_strs(*example_args[:gemm_param_fields_len]) + gemm_param = GEMMParam.parse_from_strs( + *example_args[:gemm_param_fields_len]) GEMMConfig = GEMM_CONFIG_FACTORY[strategy] - gemm_config = GEMMConfig.parse_from_strs(*example_args[gemm_param_fields_len:]) + gemm_config = GEMMConfig.parse_from_strs( + *example_args[gemm_param_fields_len:]) # Get OpenCL_Time_Ms stats measurements = list(example_test_data["measurements"].items()) - # There should only be 1 instrument per run - assert len(measurements) == 1 - measurement_instrument, data = measurements.pop() - # Get instrument name and assert that it is the one we expect - measurement_instrument_name = measurement_instrument.split("/")[0] - assert measurement_instrument_name == "OpenCLTimer" - # Take either the minimum or the average of the raw data as the measurement value - if measurement_method == "min": - measurement_val = min(data["raw"]) - elif measurement_method == "avg": - measurement_val = sum(data["raw"]) / len(data["raw"]) - else: - raise ValueError("Invalid measurement method: {}".format(measurement_method)) - - measurement = Measurement(measurement_val) + # For reshaped RHS only we have two measurements (one also for the reshape kernel) + # Hence we must parse and sum them + measurement_ms_reshape = 0 + measurement_ms_kernel = 0 + for single_measurement in measurements: + measurement_instrument, data = single_measurement + # Get instrument name and assert that it is the one we expect + measurement_instrument_name = measurement_instrument.split("/")[0] + assert measurement_instrument_name == "OpenCLTimer" + # Take either the minimum or the average of the raw data as the measurement value + if measurement_method == "min": + measurement_val = min(data["raw"]) + elif measurement_method == "avg": + measurement_val = sum(data["raw"]) / len(data["raw"]) + else: + raise ValueError( + "Invalid measurement method: {}".format(measurement_method) + ) + + measurement_type = measurement_instrument.split("/")[1] + if "reshape" in measurement_type.split("_"): + measurement_ms_reshape = measurement_val + else: + measurement_ms_kernel = measurement_val + + measurement = Measurement( + measurement_ms_reshape, measurement_ms_kernel) yield BenchmarkResult(gemm_param, strategy, gemm_config, measurement) @@ -475,15 +550,42 @@ def parse_json(dir_name): yield json.load(res_fp) +def check_out_path(out_path): + if os.path.exists(out_path): + overwrite = ( + input( + "Output JSON {} already exists. Overwrite? [Y/N]: ".format( + out_path) + ).lower() + == "y" + ) + if not overwrite: + logging.info("Skipping {}".format(out_path)) + return False + logging.info("Saving JSON file to {}".format(out_path)) + return True + + +def dump_json(out_path, dict): + with open(out_path, "w") as f: + json.dump(dict, f) + logging.info("Saved") + + ################################################################################ # Main ################################################################################ def main(args): - logging.info("Searching best gemm configurations from {}".format(args.benchmark_results_dir)) + logging.info( + "Searching best gemm configurations from {}".format( + args.benchmark_results_dir) + ) - benchmark_results = extract_benchmark_results(parse_json(args.benchmark_results_dir)) + benchmark_results = extract_benchmark_results( + parse_json(args.benchmark_results_dir) + ) # Add all benchmark results to the recorder benchmark_result_recorder = GEMMBenchmarkResultRecorder(tol=args.tolerance) @@ -496,7 +598,8 @@ def main(args): recorder_sum_level = GEMMBenchmarkResultRecorder.SummaryLevel.Short # Print overall summary of the recorded results - logging.info(benchmark_result_recorder.summary(sum_level=recorder_sum_level)) + logging.info(benchmark_result_recorder.summary( + sum_level=recorder_sum_level)) # Get GEMM configuration distributions for each strategy all_config_dists = benchmark_result_recorder.get_config_distributions() @@ -508,12 +611,16 @@ def main(args): for config, freq in config_dist.frequency(): logging.debug("{}, {}".format(config, freq)) logging.info( - "Best GEMM Config: {} with std: {}".format(config_dist.best_config(), config_dist.std()) + "Best GEMM Config: {} with std: {}".format( + config_dist.best_config(), config_dist.std() + ) ) - # Save the recorded results to csv files in output directory + # Save the recorded results to JSON files in output directory if args.output_dir is not None: - benchmark_result_recorder.save_to_csvs(args.output_dir, only_best_config=(not args.debug)) + benchmark_result_recorder.save_to_jsons( + args.output_dir, only_best_config=(not args.debug) + ) if __name__ == "__main__": @@ -538,7 +645,7 @@ if __name__ == "__main__": metavar="PATH", action="store", type=str, - help="Path to directory that holds output csv files. One per strategy", + help="Path to directory that holds output JSON files. One for strategy selection and one per strategy for GEMM config selection", ) parser.add_argument( "-t", @@ -550,7 +657,11 @@ if __name__ == "__main__": milliseconds. Recommended value: <= 0.1 ms", ) parser.add_argument( - "-D", "--debug", dest="debug", action="store_true", help="Enable script debugging output" + "-D", + "--debug", + dest="debug", + action="store_true", + help="Enable script debugging output", ) args = parser.parse_args() logging_level = logging.DEBUG if args.debug else logging.INFO -- cgit v1.2.1