From a3eded0843874a78e69e4b985cb2a492bfab78f3 Mon Sep 17 00:00:00 2001 From: Eric Kunze Date: Mon, 13 Dec 2021 15:40:04 -0800 Subject: Add rudimentary spell check for specification We can't easily spell check the entire specification, the pseudocode and operator tables make it unworkable. This adds a simple python script that tries to extract just the description of the operators for checking. It also does a check over the entire license. A custom dictionary is used to contain specification custom words. Change-Id: I74558c03af1506e2970f20b3246d920c2753ca44 Signed-off-by: Eric Kunze --- tools/dictionary.dic | 67 +++++++++++++++++++++++++++++++++++++++++++++++ tools/get_descriptions.py | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 tools/dictionary.dic create mode 100644 tools/get_descriptions.py (limited to 'tools') diff --git a/tools/dictionary.dic b/tools/dictionary.dic new file mode 100644 index 0000000..94ef782 --- /dev/null +++ b/tools/dictionary.dic @@ -0,0 +1,67 @@ +personal_ws-1.1 en 500 +activations +ARGMAX +AsciiDoc +BILINEAR +bilinearly +bitwise +BITWISE +Bool +CEIL +CLZ +concat +CONCAT +COND +conformant +const +CONST +CONV +CPUs +denormalizing +DEPTHWISE +Elementwise +foreach +Fulbourn +GPUs +Hadamard +INTDIV +licence +Licence +LICENCE +licensable +lookups +lowerroman +MATMUL +md +MERCHANTABILITY +MUL +multipler +NPUs +precisions +pseudocode +Pseudocode +PyTorch +quantization +Quantization +quantized +Quantized +README +Rescale +RESCALE +rescaled +RSQRT +sigmoid +Sigmoid +SIGMOID +SIMD +subtensor +tanh +TANH +TensorFlow +tensorinfo +TFLite +tosa +TOSA +TPUs +unary +Unary diff --git a/tools/get_descriptions.py b/tools/get_descriptions.py new file mode 100644 index 0000000..3f2ee05 --- /dev/null +++ b/tools/get_descriptions.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2022, ARM Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to pull the descriptions out of the specification so that +# they can be run through a spellcheck with less noise + +import argparse +import re + +parser = argparse.ArgumentParser() +parser.add_argument( + "filenames", nargs="+", help="filename to extract descriptions from" +) +args = parser.parse_args() + +for name in args.filenames: + # special case the license as it is all text + if name == "chapters/tosa_license.adoc": + always_in = True + else: + always_in = False + with open(name, "r") as docfile: + in_description = False + for text in docfile: + if always_in: + print(text) + continue + if not in_description: + # Look for the start of an operator + if re.match(r'^===', text): + in_description = True + print(text) + else: + # Stop when we get to a subsection like *Arguments* + # or pseudocode in a [source] section. Spellcheck is + # not useful there + if re.match(r'[\[\*]', text): + in_description = False + else: + print(text) -- cgit v1.2.1