diff options
-rw-r--r-- | docs/00_introduction.dox | 4 | ||||
-rw-r--r-- | examples/gemm_tuner/README.md | 30 | ||||
-rwxr-xr-x[-rw-r--r--] | examples/gemm_tuner/cl_gemm_benchmark.sh (renamed from examples/gemm_tuner/benchmark_gemm_examples.sh) | 186 |
3 files changed, 159 insertions, 61 deletions
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 3dc86fe059..564115ce62 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -1,5 +1,5 @@ /// -/// Copyright (c) 2017-2020 Arm Limited. +/// Copyright (c) 2017-2021 Arm Limited. /// /// SPDX-License-Identifier: MIT /// @@ -125,6 +125,8 @@ v21.02 Public major release - NEUpsampleLayerKernel / CLUpsampleLayerKernel - Extend OpenCL tuner with workgroup batch size support - Experimental extension for the OpenCL tuner to tune the batches of work groups distribute to compute units + - Add functionality to load the OpenCL GEMM heuristics at runtime + - The GEMM heuristic file (MLGO) can be used to update the default GEMM heuristics available for OpenCL v20.11 Public major release - Various bug fixes. diff --git a/examples/gemm_tuner/README.md b/examples/gemm_tuner/README.md index 73bddc9239..aae803eabb 100644 --- a/examples/gemm_tuner/README.md +++ b/examples/gemm_tuner/README.md @@ -9,7 +9,7 @@ The details of these strategies can be found in the documentations of the corres **CLGEMMMatrixMultiplyReshapedOnlyRHSKernel**. The Tuner consists of 2 scripts and 3 binaries: -* benchmark_gemm_examples.sh and GemmTuner.py under examples/gemm_tuner, and +* cl_gemm_benchmark and GemmTuner.py under examples/gemm_tuner, and * benchmark_cl_gemm_native, benchmark_cl_gemm_reshaped_rhs_only and benchmark_cl_gemm_reshaped under build/tests/gemm_tuner (you'll need to build the library first) @@ -42,7 +42,7 @@ what kernel and subsequently what configurations for that kernels are the most p ### Step2: Push relevant files to the target device All the files that need to be present on the target device are: -* benchmark script: \<ComputeLibrary\>/examples/gemm_tuner/benchmark_gemm_examples.sh +* benchmark script: \<ComputeLibrary\>/examples/gemm_tuner/cl_gemm_benchmark * shapes and configs csv files: gemm_shapes.csv, gemm_configs_native.csv, gemm_configs_reshaped_only_rhs.csv, gemm_configs_reshaped.csv * Example benchmark binaries: \<ComputeLibrary\>/build/tests/gemm_tuner/benchmark_cl_gemm* @@ -51,15 +51,25 @@ With these files on device, we can collect benchmark data using the script. Assu to a folder called *gemm_tuner*. While logged onto our device: ``` # Native -./benchmark_gemm_examples.sh -s native -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_native.csv -o results/native +./cl_gemm_benchmark -s native -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_native.csv -o results/native # Reshaped Only RHS -./benchmark_gemm_examples.sh -s reshaped_rhs_only -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped_only_rhs.csv -o results/reshaped_only_rhs +./cl_gemm_benchmark -s reshaped_rhs_only -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped_only_rhs.csv -o results/reshaped_only_rhs # Reshaped -./benchmark_gemm_examples.sh -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped ``` You can repeat the 3 commands above to have a bit redundancy in your benchmark data (as you can imagine, measurement is noisy), but you may need to change the output folder for each repeat +It is also possible to split the benchmark phase among different platforms using the **-i** and **-n** options to specificy the starting experiment and the number of benchmark to run. + +# Reshaped benchmark on 3 different platforms +## Platform 1 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 0 -n 8 +## Platform 2 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 8 -n 8 +## Platform 3 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 16 -n 8 + ### Step4: Generate the heuristics 1. After benchmarking, we pull the benchmark data, the *results* folder, from the target device to our host machine 2. We use the GemmTuner.py script to give us the heuristics @@ -77,7 +87,7 @@ passing a lower value to *-t \<tolerance\>* to the GemmTuner.py script. * Android or Linux OS * Bash shell * Built Compute Library with benchmark examples binaries - * benchmark_gemm_examples.sh script + * cl_gemm_benchmark script * gemm shape file A csv file containing the **GEMMParam search list**. This is the list of GEMMParams/gemm shapes that we're @@ -202,13 +212,13 @@ passing a lower value to *-t \<tolerance\>* to the GemmTuner.py script. ## Usage The usage of the 2 scripts: -1. benchmark_gemm_examples.sh +1. cl_gemm_benchmark - Run the shell script (**benchmark_gemm_examples.sh**) on your **target device**. Note that all the built benchmark + Run the shell script (**cl_gemm_benchmark**) on your **target device**. Note that all the built benchmark examples: build/tests/gemm_tuner/benchmark_cl_gemm*, have to be present on your target device prior to running. The benchmark results will be saved to json files in an output directory. ``` - Usage: benchmark_gemm_examples.sh [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\> + Usage: cl_gemm_benchmark [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\> -c \<gemm_config_file\> [-d \<data_type\>] [-o \<out_dir\>] Options: @@ -265,4 +275,4 @@ The usage of the 2 scripts: milliseconds. Recommended value: <= 0.1 ms -D, --debug Enable script debugging output - ```
\ No newline at end of file + ``` diff --git a/examples/gemm_tuner/benchmark_gemm_examples.sh b/examples/gemm_tuner/cl_gemm_benchmark.sh index 8789db91c7..a49c2e154b 100644..100755 --- a/examples/gemm_tuner/benchmark_gemm_examples.sh +++ b/examples/gemm_tuner/cl_gemm_benchmark.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020 Arm Limited. +# Copyright (c) 2019-2021 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -31,6 +31,9 @@ CMD=$( basename $0 ) # All supported strategy options ALL_STRATEGY_OPTIONS=("native" "reshaped_rhs_only" "reshaped") +# All supported data type options +ALL_DATA_TYPE_OPTIONS=("f32" "f16" "qasymm8") + # Names of example binary for each strategy EXAMPLE_BIN_NATIVE="benchmark_cl_gemm_native" EXAMPLE_BIN_RESHAPED_RHS_ONLY="benchmark_cl_gemm_reshaped_rhs_only" @@ -39,11 +42,20 @@ EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP="benchmark_cl_gemmlowp_reshaped_rhs_only_fuse EXAMPLE_BIN_RESHAPED_LOWP="benchmark_cl_gemmlowp_reshaped" # Default data type -DEFAULT_DATA_TYPE="F32" +DEFAULT_DATA_TYPE="f32" # Default output directory DEFAULT_OUT_DIR="out" +# Default ID of the first experiment +DEFAULT_ID_EXPERIMENT_START=0 + +# Default total number of experiments +DEFAULT_NUM_EXPERIMENTS="all" + +# Default output file extension +DEFAULT_OUT_EXTENSION="gemm_benchmark" + # Number of iterations for each benchmark run NUM_ITERATION=5 # Global }}} @@ -61,7 +73,7 @@ NUM_ITERATION=5 function help_gemm_shape_file() { cat >&2 << EOF Gemm shape file: - Gemm shape file is a headerless csv file with fields separated by commas + Gemm shape file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. A gemm shape is a list of 4 positive integers <M, N, K, B> describing the shapes of the two matrices (LHS and RHS) with: @@ -90,7 +102,7 @@ EOF function help_gemm_config_file_native() { cat >&2 << EOF Gemm config file (Strategy native): - Gemm config file is a headerless csv file with fields separated by commas + Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. A gemm config is a list of 3 positive integers <m0, n0, k0>, with: m0 - Number of rows processed by the matrix multiplication @@ -122,9 +134,7 @@ EOF function help_gemm_config_file_reshaped_rhs_only() { cat >&2 << EOF Gemm config file (Strategy reshaped_rhs_only): - Gemm config file is a headerless csv file with fields separated by commas. - - Note also comments and extraneous empty lines are not permitted. + Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 3 boolean values: m0 - Number of rows processed by the matrix multiplication @@ -163,7 +173,7 @@ EOF function help_gemm_config_file_reshaped() { cat >&2 << EOF Gemm config file (Strategy reshaped): - Gemm config file is a headerless csv file with fields separated by commas + Gemm config file is a csv file with fields separated by commas. The header and comments are ignored by the parser. A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 4 boolean values: m0 - Number of rows processed by the matrix multiplication @@ -214,7 +224,7 @@ function usage() { Run gemm examples of a selected strategy, over provided tunable configurationsa and gemm shapes. Save the benchmark results to json files in an output directory. -Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-d <data_type>] [-o <out_dir>] +Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-o <out_dir>] [-d <data_type>] [-i <id_experiment_start>] [-n <num_experiments>] [-t <output_extension>] Options: -h @@ -239,14 +249,26 @@ Options: Default: ${DEFAULT_DATA_TYPE} Supported options: Strategy : Data Types - Native : F32 - Reshaped : F16, F32, QASYMM8 - Reshaped RHS Only : F16, F32, QASYMM8 + Native : f32 + Reshaped : f32, f16, qasymm8 + Reshaped RHS Only : f32, f16, qasymm8 -o <out_dir> Path to output directory that holds output json files Default: ${DEFAULT_OUT_DIR} + -i <id_experiment_start> + ID of the first experiment. + Default: ${DEFAULT_ID_EXPERIMENT_START} + + -n <num_experiments> + Total number of experiments to execute in this session. [1-all] + Default: ${DEFAULT_NUM_EXPERIMENTS} + + -t <output_extension> + Output file extension. + Default: ${DEFAULT_OUT_EXTENSION} + EOF # Print help messages about gemm shapes and various gemm configs $HELP && help_gemm_shape_file @@ -329,6 +351,12 @@ function arr_contains() { # NUM_ITERATION # GEMM_CONFIGS_FILE # GEMM_SHAPES_FILE +# STRATEGY_OPTION +# DATA_TYPE +# OUT_DIR +# ID_EXPERIMENT_START +# NUM_EXPERIMENTS + # Arguments: # example_bin Name of the example binary to run # Returns: @@ -336,47 +364,97 @@ function arr_contains() { ####################################### function run() { local example_bin=$1 - echo "Running all configs for ${example_bin}" 1>&2 + echo "Running experiments for ${example_bin}" 1>&2 local example_args - local expr_count=1 + local json_filename + local expr_count=0 + # Total number of experiments available + local num_experiments_total # Total number of experiment runs scheduled for this session - local total_num_experiment - local num_params - local num_configs + local num_experiments_session + local id_experiment_start + local id_experiment_end + local array_shapes + local array_configs + local array_shapes_len + local array_configs_len + local array_shapes_idx + local array_configs_idx local match_expression_shape="^([^,]*,){3}[^,]*$" local match_expression_config="^(\s*[0-9]+\s*,)+\s*[0-9]\s*$" - # Don't count empty lines and lines starting with # (comments) - num_params=$( grep -E "$match_expression_shape" "${GEMM_SHAPES_FILE}" | wc -l | cut -d " " -f 1) - num_configs=$( grep -E "$match_expression_config" "${GEMM_CONFIGS_FILE}" | wc -l | cut -d " " -f 1) - (( total_num_experiment=${num_params} * ${num_configs} )) + local shapes_list_cmd="grep -E "$match_expression_shape" "${GEMM_SHAPES_FILE}"" + local configs_list_cmd="grep -E "$match_expression_config" "${GEMM_CONFIGS_FILE}"" + + # Create array from CSV file + array_shapes=($( $shapes_list_cmd )) + array_configs=($( $configs_list_cmd )) + + # Get array length + array_shapes_len=${#array_shapes[@]} + array_configs_len=${#array_configs[@]} + + # Get the total number of experiments available + (( num_experiments_total=${array_shapes_len} * ${array_configs_len} )) + + # Get the number of experiments to execute in this session + if [ ${NUM_EXPERIMENTS} == ${DEFAULT_NUM_EXPERIMENTS} ] + then + (( num_experiments_session=${array_shapes_len} * ${array_configs_len} )) + else + num_experiments_session=$NUM_EXPERIMENTS + fi + + # Id experiment start + id_experiment_start=${ID_EXPERIMENT_START} + + # Id experiment end + (( id_experiment_end=(${num_experiments_session} + ${id_experiment_start} - 1) )) + + # Check if the id experiment end is grater than or equal to the total number of experiments available. + # If the condition is satisfied, clamp the id experiment end + if [ "$id_experiment_end" -ge "$num_experiments_total" ] + then + echo "Clamping idx experiment end" 1>&2 + (( id_experiment_end=${num_experiments_total} - 1 )) + (( num_experiments_session=${id_experiment_start} + ${id_experiment_end} + 1 )) + fi + # Time elapsed since the beginning in seconds local time_elapsed_s # Time estimated to finish in seconds local time_est_s - echo "Running a total number of ${total_num_experiment} experiments" 1>&2 + echo "Running a total number of ${num_experiments_session} experiments" 1>&2 + echo "Experiment idx start/end [${id_experiment_start}, ${id_experiment_end}]" 1>&2 - while read gemm_shape + # Run experiments + for i in $(seq $id_experiment_start $id_experiment_end); do - while read gemm_config - do - # Ignore empty lines and lines starting with # (comments) - if echo "$gemm_shape" | grep -Eq "$match_expression_shape" && echo "$gemm_config" | grep -Eq "$match_expression_config";then - echo "Running..." 1>&2 - example_args="${gemm_shape},${gemm_config},--type=${DATA_TYPE}" - # Run experiment - ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${expr_count}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS - # Print progress - print_progress ${expr_count} ${total_num_experiment} - # Print time statistics - time_elapsed_s=$SECONDS - echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2 - (( time_est_s=(${total_num_experiment} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} )) - echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2 - (( expr_count++ )) - echo "Done." 1>&2 - fi - done < "${GEMM_CONFIGS_FILE}" - done < "${GEMM_SHAPES_FILE}" + (( array_shapes_idx=${i} / ${array_configs_len} )) + (( array_configs_idx=${i} % ${array_configs_len} )) + + gemm_shape=${array_shapes[$array_shapes_idx]} + gemm_config=${array_configs[$array_configs_idx]} + + echo "Running shape[$array_shapes_idx]=$gemm_shape with config[$array_configs_idx]=$gemm_config" 1>&2 + + example_args="${gemm_shape},${gemm_config},--type=${DATA_TYPE}" + json_filename="${STRATEGY_OPTION}_${gemm_shape}_${gemm_config}_${DATA_TYPE}" + # Replace "," with "_" + json_filename=${json_filename//,/_} + + # Run experiment + ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${json_filename}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS + # Print progress + (( expr_count++ )) + print_progress ${expr_count} ${num_experiments_session} + # Print time statistics + time_elapsed_s=$SECONDS + echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2 + (( time_est_s=(${num_experiments_session} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} )) + echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2 + echo "Done." 1>&2 + done + echo "Finished running all configs for ${example_bin}" 1>&2 echo "All results saved to ${OUT_DIR}" 1>&2 } @@ -420,18 +498,23 @@ EXAMPLE_BIN_DIR="" GEMM_SHAPES_FILE="" # Path to gemm configs file GEMM_CONFIGS_FILE="" +# Strategy option STRATEGY_OPTION="" # Data type to use DATA_TYPE=${DEFAULT_DATA_TYPE} # Path to output directory OUT_DIR=${DEFAULT_OUT_DIR} +# ID of the first experiment +ID_EXPERIMENT_START=${DEFAULT_ID_EXPERIMENT_START} +# Total number of experiments to execute in this session +NUM_EXPERIMENTS=${DEFAULT_NUM_EXPERIMENTS} # Output benchmark result file extension -OUT_EXTENSION="gemmtuner_benchmark" +OUT_EXTENSION=${DEFAULT_OUT_EXTENSION} # Toggle help HELP=false # Obtain options -while getopts "hs:e:g:c:d:o:" opt; do +while getopts "hs:e:g:c:d:o:i:n:t:" opt; do case "$opt" in h) HELP=true ;; s) STRATEGY_OPTION=$(to_lower "${OPTARG}");; @@ -440,6 +523,9 @@ while getopts "hs:e:g:c:d:o:" opt; do c) GEMM_CONFIGS_FILE="${OPTARG}";; d) DATA_TYPE=$(to_lower "${OPTARG}");; o) OUT_DIR="${OPTARG}";; + i) ID_EXPERIMENT_START="${OPTARG}";; + n) NUM_EXPERIMENTS="${OPTARG}";; + t) OUT_EXTENSION="${OPTARG}";; esac done shift $((OPTIND - 1)) @@ -473,6 +559,10 @@ $HELP && arr_contains "${STRATEGY_OPTION}" "${ALL_STRATEGY_OPTIONS[@]}" || error_msg "Does not support strategy ${STRATEGY_OPTION}" +# Verify data type option is valid +arr_contains "${DATA_TYPE}" "${ALL_DATA_TYPE_OPTIONS[@]}" || + error_msg "Does not support data type ${DATA_TYPE}" + # Make sure existing benchmark outputs are not overwritten [ ! -d "${OUT_DIR}" ] || error_msg "Output directory ${OUT_DIR} already exists!" @@ -480,20 +570,16 @@ arr_contains "${STRATEGY_OPTION}" "${ALL_STRATEGY_OPTIONS[@]}" || # Make output directory echo "Making output directory ${OUT_DIR}" 1>&2 mkdir -p ${OUT_DIR} || error_msg "Failed to make output directory ${OUT_DIR}" -date +%s > ${OUT_DIR}/start_time_unix_seconds # Run selected strategy with all configurations # Restart the built-in timer +SECONDS=0 if [ "$DATA_TYPE" == "qasymm8" ]; then - SECONDS=0 [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED_LOWP - date +%s > ${OUT_DIR}/end_time_unix_seconds else - SECONDS=0 [ "${STRATEGY_OPTION}" == "native" ] && run $EXAMPLE_BIN_NATIVE [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED - date +%s > ${OUT_DIR}/end_time_unix_seconds fi # Main: Main script }}} |