Title: [265343] trunk/Tools
Revision
265343
Author
ddkil...@apple.com
Date
2020-08-06 13:06:18 -0700 (Thu, 06 Aug 2020)

Log Message

check-webkit-style: better algorithm to check for acronym capitalization in an identifier
<https://webkit.org/b/215026>

Reviewed by Darin Adler.

* Scripts/webkitpy/style/checkers/cpp.py:
(_split_identifier_into_words): Add.
- This method splits a identifier into individual words.
(_check_identifier_name_for_acronyms):
- Update to use _split_identifier_into_words(), which makes it
  possible to check for improperly capitalized acronyms in the
  middle of identifiers.
- Also add support for exceptions, which are valid words that
  include acronyms (like "Curl").
* Scripts/webkitpy/style/checkers/cpp_unittest.py:
(CppStyleTest):
- Fix a typo in a method name in another test.
(WebKitStyleTest.test_split_identifier_into_words): Add.
- Add tests for _split_identifier_into_words().
(WebKitStyleTest.test_identifier_names_with_acronyms):
- Add tests for cases that weren't possible with the previous
  algorithm.

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (265342 => 265343)


--- trunk/Tools/ChangeLog	2020-08-06 20:01:30 UTC (rev 265342)
+++ trunk/Tools/ChangeLog	2020-08-06 20:06:18 UTC (rev 265343)
@@ -1,3 +1,28 @@
+2020-08-06  David Kilzer  <ddkil...@apple.com>
+
+        check-webkit-style: better algorithm to check for acronym capitalization in an identifier
+        <https://webkit.org/b/215026>
+
+        Reviewed by Darin Adler.
+
+        * Scripts/webkitpy/style/checkers/cpp.py:
+        (_split_identifier_into_words): Add.
+        - This method splits a identifier into individual words.
+        (_check_identifier_name_for_acronyms):
+        - Update to use _split_identifier_into_words(), which makes it
+          possible to check for improperly capitalized acronyms in the
+          middle of identifiers.
+        - Also add support for exceptions, which are valid words that
+          include acronyms (like "Curl").
+        * Scripts/webkitpy/style/checkers/cpp_unittest.py:
+        (CppStyleTest):
+        - Fix a typo in a method name in another test.
+        (WebKitStyleTest.test_split_identifier_into_words): Add.
+        - Add tests for _split_identifier_into_words().
+        (WebKitStyleTest.test_identifier_names_with_acronyms):
+        - Add tests for cases that weren't possible with the previous
+          algorithm.
+
 2020-08-06  Aakash Jain  <aakash_j...@apple.com>
 
         [ews] Add method to send email notifications to patch author for build failure

Modified: trunk/Tools/Scripts/webkitpy/style/checkers/cpp.py (265342 => 265343)


--- trunk/Tools/Scripts/webkitpy/style/checkers/cpp.py	2020-08-06 20:01:30 UTC (rev 265342)
+++ trunk/Tools/Scripts/webkitpy/style/checkers/cpp.py	2020-08-06 20:06:18 UTC (rev 265343)
@@ -1796,6 +1796,101 @@
     return True
 
 
+def _split_identifier_into_words(identifier):
+    words = []
+    if not identifier:
+        return words
+
+    # Remove prefixes that aren't part of the identifier name.
+    identifier = re.sub(r'^[gms]?_', '', identifier)
+    # Remove bitfield lengths.
+    identifier = re.sub(r':[0-9]+$', '', identifier)
+
+    identifier_length = len(identifier)
+
+    match_upper_re = re.compile(r'^[A-Z]+')
+    match_upper_lower_re = re.compile(r'^[A-Z][a-z]+')
+    match_lower_re = re.compile(r'^[a-z]+')
+
+    match_lower = match_lower_re.search(identifier)
+    match_upper_lower = match_upper_lower_re.search(identifier)
+    match_upper = match_upper_re.search(identifier)
+    if match_lower:
+        word = match_lower.group(0)
+        words.append(word)
+        if len(word) == identifier_length:
+            return words
+        identifier = identifier[len(word):]
+    elif match_upper_lower:
+        word = match_upper_lower.group(0)
+        words.append(word)
+        if len(word) == identifier_length:
+            return words
+        identifier = identifier[len(word):]
+    elif match_upper:
+        word = match_upper.group(0)
+        if len(word) == identifier_length:
+            words.append(word)
+            return words
+        if identifier[len(word)].islower():
+            word = word[:-1]
+        words.append(word)
+        identifier = identifier[len(word):]
+
+    match_number_re = re.compile(r'^[0-9]+')
+    while identifier:
+        identifier_length = len(identifier)
+        if identifier.startswith('_'):
+            if len(identifier) == 1:
+                return words
+            identifier = identifier[1:]
+            continue
+        if identifier.startswith('::'):
+            words.append('::')
+            if len(identifier) == 2:
+                return words
+            identifier = identifier[2:]
+            continue
+        match_upper_lower = match_upper_lower_re.search(identifier)
+        if match_upper_lower:
+            word = match_upper_lower.group(0)
+            words.append(word)
+            if len(word) == identifier_length:
+                return words
+            identifier = identifier[len(word):]
+            continue
+        match_upper = match_upper_re.search(identifier)
+        if match_upper:
+            word = match_upper.group(0)
+            if len(word) == len(identifier):
+                words.append(word)
+                return words
+            if identifier[len(word)].islower():
+                word = word[:-1]
+            words.append(word)
+            identifier = identifier[len(word):]
+            continue
+        match_number = match_number_re.search(identifier)
+        if match_number:
+            word = match_number.group(0)
+            words.append(word)
+            if len(word) == identifier_length:
+                return words
+            identifier = identifier[len(word):]
+            continue
+        match_lower = match_lower_re.search(identifier)
+        if match_lower:
+            word = match_lower.group(0)
+            words.append(word)
+            if len(word) == identifier_length:
+                return words
+            identifier = identifier[len(word):]
+            continue
+        assert False, 'Could not match "%s"' % identifier
+
+    return words
+
+
 def _check_identifier_name_for_acronyms(identifier, line_number, is_class_or_namespace_or_struct_name, error):
     """Checks to see if the identifier name contains an acronym with improper case.
 
@@ -1803,30 +1898,58 @@
     middle or at the end of an identifier name, but "Url" is never okay.
     """
     acronyms = '|'.join(['MIME', 'URL'])
+    acronym_exceptions = '|'.join(['cfurl', 'curl', 'Curl', 'nsurl', 'urls'])
 
-    start_re = re.compile('^(%s)(|$)' % acronyms, re.IGNORECASE)
-    start_expected_re = re.compile('^(%s)([^:]|$)' % acronyms.lower())
-    # Identifiers that start with an acronym must be all lowercase, except for class/namespace/struct names.
-    if start_re.search(identifier) and not start_expected_re.search(identifier):
-        start_uppercase_re = re.compile('^(%s)' % acronyms)
-        # Ignore class/namespace/struct names that start with all-uppercase acronyms.
-        if start_uppercase_re.search(identifier) and \
-                (is_class_or_namespace_or_struct_name or identifier.find('::') != -1):
-            return True
-        error(line_number, 'readability/naming/acronym', 5,
-              'The identifier name "%s" starts with a acronym that is not all lowercase.' % identifier)
-        return False
+    identifier_words = _split_identifier_into_words(identifier)
 
-    # FIXME: Hard to check middle words without knowing that the word to the left doesn't end with an acronym.
+    is_constructor = False
+    if identifier_words.count('::') == 1:
+        names = identifier.split('::')
+        if names[0] == names[1]:
+            is_constructor = True
 
-    # Identifiers that end with an acronym must be all uppercase, except for variables like 'm_url' and 'Class::url()'.
-    end_re = re.compile('[^_:](%s)$' % acronyms, re.IGNORECASE)
-    end_expected_re = re.compile('[^_:](%s)$' % acronyms)
-    if end_re.search(identifier) and not end_expected_re.search(identifier):
-        error(line_number, 'readability/naming/acronym', 5,
-              'The identifier name "%s" ends with a acronym that is not all uppercase.' % identifier)
-        return False
+    contains_acronym_lowercase_re = re.compile('(%s)' % acronyms.lower())
+    is_acronym_any_case_re = re.compile('^(%s)$' % acronyms, re.IGNORECASE)
+    is_acronym_lowercase_re = re.compile('^(%s)$' % acronyms.lower())
+    is_acronym_uppercase_re = re.compile('^(%s)$' % acronyms.upper())
+    is_acronym_exception_any_case_re = re.compile('^(%s)$' % acronym_exceptions)
 
+    start_of_variable = True
+    for i in range(0, len(identifier_words)):
+        word = identifier_words[i]
+
+        if word == '::':
+            start_of_variable = True
+            continue
+
+        if start_of_variable:
+            start_of_variable = False
+            # Identifiers that start with an acronym must be all lowercase, except for class/namespace/struct names.
+            if is_acronym_any_case_re.search(word):
+                if is_acronym_lowercase_re.search(word):
+                    continue
+                elif is_acronym_uppercase_re.search(word) and \
+                        (is_class_or_namespace_or_struct_name or '::' in identifier_words[i:] or is_constructor):
+                    continue
+                else:
+                    error(line_number, 'readability/naming/acronym', 5,
+                          'The identifier name "%s" starts with an acronym that is not all lowercase.' % identifier)
+                    return False
+        else:
+            # Identifiers that contain or end with an acronym must be all uppercase.
+            if is_acronym_any_case_re.search(word):
+                if is_acronym_uppercase_re.search(word):
+                    continue
+                else:
+                    error(line_number, 'readability/naming/acronym', 5,
+                          'The identifier name "%s" contains an acronym that is not all uppercase.' % identifier)
+                    return False
+
+        if contains_acronym_lowercase_re.search(word) and not is_acronym_exception_any_case_re.search(word):
+            error(line_number, 'readability/naming/acronym', 3,
+                  'The identifier name "%s" _may_ contain an acronym that is not all uppercase.' % identifier)
+            continue
+
     return True
 
 
@@ -3841,6 +3964,11 @@
         identifier = matched.group('identifier')
         character_after_identifier = matched.group('character_after_identifier')
 
+        # It's possible for the regular _expression_ to match ':' in modern Objective-C for loops
+        # or NSDictionary initialization lists.
+        if identifier == ':':
+            return
+
         # If we removed a non-for-control statement, the character after
         # the identifier should be '='. With this rule, we can avoid
         # warning for cases like "if (val & INT_MAX) {".

Modified: trunk/Tools/Scripts/webkitpy/style/checkers/cpp_unittest.py (265342 => 265343)


--- trunk/Tools/Scripts/webkitpy/style/checkers/cpp_unittest.py	2020-08-06 20:01:30 UTC (rev 265342)
+++ trunk/Tools/Scripts/webkitpy/style/checkers/cpp_unittest.py	2020-08-06 20:06:18 UTC (rev 265343)
@@ -45,7 +45,7 @@
 from webkitcorepy import string_utils
 
 from webkitpy.style.checkers import cpp as cpp_style
-from webkitpy.style.checkers.cpp import CppChecker
+from webkitpy.style.checkers.cpp import CppChecker, _split_identifier_into_words
 from webkitpy.style.filter import FilterConfiguration
 
 
@@ -2043,7 +2043,7 @@
             '}\n',
             '')
         self.assert_multi_line_lint(
-            'auto Foo:bar() -> Baz\n'
+            'auto Foo::bar() -> Baz\n'
             '{\n'
             '}\n',
             '')
@@ -5949,12 +5949,55 @@
                                             '{\n'
                                             '}\n', 'test.cpp', parameter_error_rules))
 
+    def test_split_identifier_into_words(self):
+        self.assertEqual([], _split_identifier_into_words(''))
+        self.assertEqual(['a'], _split_identifier_into_words('a'))
+        self.assertEqual(['A'], _split_identifier_into_words('A'))
+        self.assertEqual(['a', 'B'], _split_identifier_into_words('aB'))
+        self.assertEqual(['Ab'], _split_identifier_into_words('Ab'))
+
+        self.assertEqual(['url'], _split_identifier_into_words('url'))
+        self.assertEqual(['Url'], _split_identifier_into_words('Url'))
+        self.assertEqual(['URL'], _split_identifier_into_words('URL'))
+        self.assertEqual(['url', 'String'], _split_identifier_into_words('urlString'))
+        self.assertEqual(['URL', 'String'], _split_identifier_into_words('URLString'))
+        self.assertEqual(['test', 'Path', 'Or', 'Url'], _split_identifier_into_words('testPathOrUrl'))
+
+        self.assertEqual(['URL', '::', 'invalidate'], _split_identifier_into_words('URL::invalidate'))
+        self.assertEqual(['URL', '::', 'invalidate'], _split_identifier_into_words('URL_::invalidate'))
+        self.assertEqual(['URL', '::', 'invalidate'], _split_identifier_into_words('URL_::_invalidate'))
+        self.assertEqual(['URL', '::', 'invalidate'], _split_identifier_into_words('URL_::invalidate'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8::invalidate'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8_::_invalidate_'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8_::_invalidate'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8_::_invalidate_'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8_::invalidate'))
+        self.assertEqual(['URL', '8', '::', 'invalidate'], _split_identifier_into_words('URL8_::_invalidate'))
+
+        self.assertEqual(['loadurl'], _split_identifier_into_words('loadurl'))
+        self.assertEqual(['load', 'Url'], _split_identifier_into_words('loadUrl'))
+        self.assertEqual(['load', 'URL'], _split_identifier_into_words('loadURL'))
+
+        self.assertEqual(['url'], _split_identifier_into_words('_url'))
+        self.assertEqual(['url'], _split_identifier_into_words('g_url'))
+        self.assertEqual(['url'], _split_identifier_into_words('m_url'))
+        self.assertEqual(['url'], _split_identifier_into_words('s_url'))
+        self.assertEqual(['DC'], _split_identifier_into_words('m_DC'))
+
+        self.assertEqual(['i', '1'], _split_identifier_into_words('i1'))
+        self.assertEqual(['url', '8'], _split_identifier_into_words('url8'))
+        self.assertEqual(['Url', '8'], _split_identifier_into_words('Url8'))
+        self.assertEqual(['URL', '8'], _split_identifier_into_words('URL8'))
+        self.assertEqual(['non', 'UTF', '8', 'Query', 'Encoding'], _split_identifier_into_words('nonUTF8QueryEncoding'))
+
+        self.assertEqual(['is', 'Soft'], _split_identifier_into_words('isSoft:1'))
+
     def test_identifier_names_with_acronyms(self):
         identifier_error_rules = ('-',
                                   '+readability/naming/acronym')
 
-        # Start of parameter name.
-        error_start = 'The identifier name "%s" starts with a acronym that is not all lowercase.'\
+        # Test that an identifier starts with an acronym.
+        error_start = 'The identifier name "%s" starts with an acronym that is not all lowercase.'\
                       '  [readability/naming/acronym] [5]'
 
         self.assertEqual('',
@@ -5991,6 +6034,8 @@
                          self.perform_lint('URLParser::URLParser(const String& input, const URL& base, const URLTextEncoding* nonUTF8QueryEncoding)', 'test.cpp', identifier_error_rules))
         self.assertEqual('',
                          self.perform_lint('bool URLParser::internalValuesConsistent(const URL& url)', 'test.cpp', identifier_error_rules))
+        self.assertEqual(error_start % 'URL::URLNotConstructor',
+                         self.perform_lint('bool URL::URLNotConstructor()', 'test.cpp', identifier_error_rules))
 
         self.assertEqual('',
                          self.perform_lint('String m_url;', 'test.cpp', identifier_error_rules))
@@ -6002,15 +6047,11 @@
         self.assertEqual(error_start % 'UrlParse::UrlParse',
                          self.perform_lint('void UrlParse::UrlParse()', 'test.cpp', identifier_error_rules))
 
-        # FIXME: Hard to check middle words without knowing that the word to the left doesn't end with an acronym.
+        # Test that identifier contains an acronym.
+        error_contain = 'The identifier name "%s" contains an acronym that is not all uppercase.'\
+                        '  [readability/naming/acronym] [5]'
 
-        error_end = 'The identifier name "%s" ends with a acronym that is not all uppercase.'\
-                    '  [readability/naming/acronym] [5]'
-
-        # End of parameter name.
-        self.assertEqual(error_end % 'loadurl',
-                         self.perform_lint('void load(URL loadurl);', 'test.cpp', identifier_error_rules))
-        self.assertEqual(error_end % 'loadUrl',
+        self.assertEqual(error_contain % 'loadUrl',
                          self.perform_lint('void load(URL loadUrl);', 'test.cpp', identifier_error_rules))
         self.assertEqual('',
                          self.perform_lint('void load(URL loadURL);', 'test.cpp', identifier_error_rules))
@@ -6017,11 +6058,39 @@
 
         self.assertEqual('',
                          self.perform_lint('void InspectorFrontendHost::inspectedURLChanged(const String& newURL)', 'test.cpp', identifier_error_rules))
-        self.assertEqual(error_end % 'testPathOrUrl',
+        self.assertEqual(error_contain % 'testPathOrUrl',
                          self.perform_lint('static void changeWindowScaleIfNeeded(const char* testPathOrUrl)', 'test.cpp', identifier_error_rules))
-        self.assertEqual(error_end % 'localPathOrUrl',
+        self.assertEqual(error_contain % 'localPathOrUrl',
                          self.perform_lint('auto localPathOrUrl = String(testPathOrURL);', 'test.cpp', identifier_error_rules))
 
+        # Test that an identifier _might_ contain an acronym.
+        error_may_contain = 'The identifier name "%s" _may_ contain an acronym that is not all uppercase.'\
+                            '  [readability/naming/acronym] [3]'
+        self.assertEqual(error_may_contain % 'loadurl',
+                         self.perform_lint('void load(URL loadurl);', 'test.cpp', identifier_error_rules))
+        self.assertEqual(error_may_contain % 'canLoadurl',
+                         self.perform_lint('void load(URL url, bool canLoadurl);', 'test.cpp', identifier_error_rules))
+
+        # Test special exceptions.
+        self.assertEqual('', self.perform_lint(
+            'void curlDidSendData(WebCore::CurlRequest&, unsigned long long, unsigned long long) override;',
+            'NetworkDataTaskCurl.h', identifier_error_rules))
+        self.assertEqual('', self.perform_lint(
+            'void NetworkDataTaskCurl::cancel()',
+            'NetworkDataTaskCurl.cpp', identifier_error_rules))
+        self.assertEqual('', self.perform_lint(
+            'String m_nsurlCacheDirectory;',
+            'PluginProcess.h', identifier_error_rules))
+
+        # Sometimes check_identifier_name_in_declaration() gets confused
+        # and thinks ':' is an identifier.
+        self.assertEqual('', self.perform_lint(
+            'NSFilePosixPermissions : [NSNumber numberWithInteger:(WEB_UREAD | WEB_UWRITE)],',
+            'QuickLook.mm', identifier_error_rules))
+        self.assertEqual('', self.perform_lint(
+            'for (CALayer *currLayer : [layer sublayers]) {',
+            'RemoteLayerTreeScrollingPerformanceData.mm', identifier_error_rules))
+
     def test_comments(self):
         # A comment at the beginning of a line is ok.
         self.assert_lint('// comment', '')
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to