Mike Pattrick via dev <[email protected]> writes:
> On Thu, Aug 7, 2025 at 2:07 PM Eli Oliver via dev
> <[email protected]> wrote:
>>
>> Checkpatch is used to do spell checking for C files, currently. However,
>> python code is also a routine part of the Open vSwitch suite, and it should
>> also receive spell checking where possible.
>>
>> This patch adds an initial implementation of spell checking to checkpatch
>> that will catch all of the comments starting with '#', and some of the
>> doc-string lines. Future work would implement a more robust python
>> parser that will handle doc-string entries.
>>
>> Signed-off-by: Eli Oliver <[email protected]>
>> ---
>> tests/checkpatch.at | 64 +++++++++++++++++++++++++++++++++--------
>> utilities/checkpatch.py | 31 ++++++++++++++++----
>> 2 files changed, 78 insertions(+), 17 deletions(-)
>>
>> diff --git a/tests/checkpatch.at b/tests/checkpatch.at
>> index 994876e08..17bb74c43 100755
>> --- a/tests/checkpatch.at
>> +++ b/tests/checkpatch.at
>> @@ -39,34 +39,55 @@ Subject: Patch this is.
>> fi
>> }
>>
>> -# try_checkpatch_c_file SOURCE [ERRORS] [CHECKPATCH-ARGS] [FILTER]
>> +# try_checkpatch_file FILENAME SOURCE [ERRORS] [CHECKPATCH-ARGS] [FILTER]
>> #
>> -# Runs checkpatch against test SOURCE expecting the set of specified
>> -# ERRORS (and warnings). Optionally, sets [CHECKPATCH-ARGS]. The
>> -# optional FILTER can be used to adjust the expected output before
>> +# Runs checkpatch against FILENAME with SOURCE contents, expecting the set
>> +# of specified ERRORS (and warnings). Optionally, sets [CHECKPATCH-ARGS].
>> +# The optional FILTER can be used to adjust the expected output before
>> # processing.
>> -try_checkpatch_c_file() {
>> - echo "$1" | sed 's/^ //' > test.c
>> +try_checkpatch_file() {
>> + local FILENAME="$1"
>> + echo "$2" | sed 's/^ //' > "$FILENAME"
>>
>> - # Take expected output from $2.
>> - if test -n "$2"; then
>> - echo "$2" | sed 's/^ //' > expout
>> + # Take expected output from $3.
>> + if test -n "$3"; then
>> + echo "$3" | sed 's/^ //' > expout
>> else
>> : > expout
>> fi
>>
>> if test -s expout; then
>> AT_CHECK([OVS_SRC_DIR=$top_srcdir $PYTHON3 \
>> - $top_srcdir/utilities/checkpatch.py $3 -q -f test.c],
>> + $top_srcdir/utilities/checkpatch.py $4 -q -f
>> "$FILENAME"],
>> [1], [stdout])
>> - AT_CHECK([sed "$4
>> + AT_CHECK([sed "$5
>> /^Lines checked:/d
>> /^$/d" stdout], [0], [expout])
>> else
>> AT_CHECK([OVS_SRC_DIR=$top_srcdir $PYTHON3 \
>> - $top_srcdir/utilities/checkpatch.py $3 -q -f test.c])
>> + $top_srcdir/utilities/checkpatch.py $4 -q -f
>> "$FILENAME"])
>> fi
>> }
>> +
>> +# try_checkpatch_c_file SOURCE [ERRORS] [CHECKPATCH-ARGS] [FILTER]
>> +#
>> +# Runs checkpatch against "C" SOURCE contents, expecting the set of
>> +# specified ERRORS (and warnings). Optionally, sets [CHECKPATCH-ARGS].
>> +# The optional FILTER can be used to adjust the expected output before
>> +# processing.
>> +try_checkpatch_c_file() {
>> + try_checkpatch_file "test.c" "$@"
>> +}
>> +
>> +# try_checkpatch_py_file SOURCE [ERRORS] [CHECKPATCH-ARGS] [FILTER]
>> +#
>> +# Runs checkpatch against "Python" SOURCE contents, expecting the set of
>> +# specified ERRORS (and warnings). Optionally, sets [CHECKPATCH-ARGS].
>> +# The optional FILTER can be used to adjust the expected output before
>> +# processing.
>> +try_checkpatch_py_file() {
>> + try_checkpatch_file "test.py" "$1"
>> +}
>> OVS_END_SHELL_HELPERS
>>
>> AT_SETUP([checkpatch - sign-offs])
>> @@ -763,4 +784,23 @@ try_checkpatch_c_file \
>> "" \
>> "-S"
>>
>> +dnl Second check with some different words
>> +try_checkpatch_c_file \
>> + "/* This code is for my private prooperty. */
>
> Nit: property
>
>
>> + " \
>> + "WARNING: Possible misspelled word: \"prooperty\"
>> + test.c:1:
>> + /* This code is for my private prooperty. */" \
>> + "-S" \
>> + "/^Did you mean:/d"
>> +
>> +try_checkpatch_py_file \
>> + "# This is a python file with an intentionaly misspelt word.
>
> Nit: intentionally
Just a note, I think the point is for these to be misspelled and cause
checkpatch to emit the warning. So I think they should stay as-is :)
>> + # The user wants to check if it's working." \
>> + "WARNING: Possible misspelled word: \"intentionaly\"
>> + test.py:1:
>> + # This is a python file with an intentionaly misspelt word." \
>> + "-S" \
>> + "/^Did you mean:/d"
>> +
>> AT_CLEANUP
>> diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
>> index 28d0977eb..88d74b717 100755
>> --- a/utilities/checkpatch.py
>> +++ b/utilities/checkpatch.py
>> @@ -384,15 +384,32 @@ def filter_comments(current_line, keep=False):
>> return sanitized_line
>>
>>
>> -def check_spelling(line, comment):
>> +NOT_COMMENT = 0
>> +C_COMMENT = 1
>> +PY_COMMENT = 2
>> +
>> +
>> +def check_spelling(line, comment_type):
>> if not spell_check_dict or not spellcheck:
>> return False
>>
>> is_name_tag = re.compile(r'^\s*([a-z-]+-by): (.*@.*)$', re.I | re.M |
>> re.S)
>> if line.startswith('Fixes: ') or is_name_tag.match(line):
>> return False
>> + if comment_type == NOT_COMMENT:
>> + words = line
>> + elif comment_type == C_COMMENT:
>> + words = filter_comments(line, True)
>> + elif comment_type == PY_COMMENT:
>> + words = ""
>> + matched = re.search(r'#[^!](.*)$', line)
>> + if matched:
>> + words = matched.group(0)[1:]
>
> Instead of
>
> matched = re.search(r'#[^!](.*)$', line)
> matched.group(0)[1:]
>
> It is probably simpler to just include the whole comment in the group
> and reference it by group ID. Eg:
>
> matched = re.search(r'#([^!].*)$', line)
> matched.group(1)
Agreed, it looks nicer to have matched.group(1)
>> + else:
>> + words = line.replace("'''", '').replace('"""', '').strip()
>> + else:
>> + return False
>>
>> - words = filter_comments(line, True) if comment else line
>> words = words.replace(':', ' ').split(' ')
>>
>> flagged_words = []
>> @@ -599,7 +616,11 @@ checks = [
>>
>> {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
>> 'prereq': lambda x: has_comment(x),
>> - 'check': lambda x: check_spelling(x, True)},
>> + 'check': lambda x: check_spelling(x, comment_type=C_COMMENT)},
>> +
>> + {'regex': r'\.py(\.in)?$', 'match_name': None,
>> + 'prereq': lambda x: "#" in x or "'''" in x or '"""' in x,
>
> Not all multiline strings in python are docstrings. However, it may
> not make too much of a difference here.
>
>> + 'check': lambda x: check_spelling(x, PY_COMMENT)},
>>
>> {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
>> 'check': lambda x: empty_return_with_brace(x),
>> @@ -783,7 +804,7 @@ def run_file_checks(text):
>> def run_subject_checks(subject, spellcheck=False):
>> warnings = False
>>
>> - if spellcheck and check_spelling(subject, False):
>> + if spellcheck and check_spelling(subject, comment_type=NOT_COMMENT):
>> warnings = True
>>
>> summary = subject[subject.rindex(': ') + 2:]
>> @@ -1005,7 +1026,7 @@ def ovs_checkpatch_parse(text, filename, author=None,
>> committer=None):
>> '--abbrev=12 COMMIT_REF\n')
>> print("%d: %s\n" % (lineno, line))
>> elif spellcheck:
>> - check_spelling(line, False)
>> + check_spelling(line, comment_type=NOT_COMMENT)
>> for typo, correct in tags_typos.items():
>> m = re.match(typo, line, re.I)
>> if m:
>> --
>> 2.49.0
>>
>>
>> _______________________________________________
>> dev mailing list
>> [email protected]
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev