Dr0ptp4kt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/196298

Change subject: Address nested parentheses in Share a Fact
......................................................................

Address nested parentheses in Share a Fact

Additionally, trim whitespace before semicolons.

Finally, use autoreleasepools, as a reviewer had requested.

Change-Id: I3c26877c7e4e220e84af9f192423384ea3b5c64e
---
M WikipediaUnitTests/NSString+WMFHTMLParsingTests.m
M wikipedia/Categories/NSString+WMFHTMLParsing.m
2 files changed, 112 insertions(+), 93 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/apps/ios/wikipedia 
refs/changes/98/196298/1

diff --git a/WikipediaUnitTests/NSString+WMFHTMLParsingTests.m 
b/WikipediaUnitTests/NSString+WMFHTMLParsingTests.m
index 698180d..f4dd719 100644
--- a/WikipediaUnitTests/NSString+WMFHTMLParsingTests.m
+++ b/WikipediaUnitTests/NSString+WMFHTMLParsingTests.m
@@ -33,8 +33,10 @@
 }
 
 - (void)testAdequateSnippet {
-    NSString *string = @"<p>Dog (woof) [horse] adequately long string</p>";
-    XCTAssertEqualObjects([string wmf_getStringSnippetWithoutHTML], @"Dog 
adequately long string");
+    NSString *string = @"<p>Dog (woof (w00t)) [horse] adequately long string 
historically 40 characters.</p>";
+    NSString *result = [string wmf_getStringSnippetWithoutHTML];
+    XCTAssertEqualObjects([string wmf_getStringSnippetWithoutHTML],
+                          @"Dog adequately long string historically 40 
characters.");
 }
 
 @end
diff --git a/wikipedia/Categories/NSString+WMFHTMLParsing.m 
b/wikipedia/Categories/NSString+WMFHTMLParsing.m
index 9c06619..0e5663b 100644
--- a/wikipedia/Categories/NSString+WMFHTMLParsing.m
+++ b/wikipedia/Categories/NSString+WMFHTMLParsing.m
@@ -42,100 +42,117 @@
 }
 
 + (NSString*)wmf_stringSnippetSimplifiedInString:(NSString*)string {
-    NSString* result                   = [string 
stringByReplacingOccurrencesOfString:@"&amp;" withString:@"&"];
-    NSError* err                       = nil;
-    NSRegularExpression* newlinesRegex = [NSRegularExpression
-                                          
regularExpressionWithPattern:@"\n{2,}"
-                                                               options:0
-                                                                 error:&err];
-    NSRange range = NSMakeRange(0, result.length);
-    result = [newlinesRegex stringByReplacingMatchesInString:result
+    @autoreleasepool {
+        NSString* result                   = [string 
stringByReplacingOccurrencesOfString:@"&amp;" withString:@"&"];
+        NSError* err                       = nil;
+        NSRegularExpression* newlinesRegex = [NSRegularExpression
+                                              
regularExpressionWithPattern:@"\n{2,}"
+                                              options:0
+                                              error:&err];
+        NSRange range = NSMakeRange(0, result.length);
+        result = [newlinesRegex stringByReplacingMatchesInString:result
+                                                         options:0
+                                                           range:range
+                                                    withTemplate:@"\n"];
+        
+        
+        // We probably don't want to try to handle ideographic parens
+        err = nil;
+        NSRegularExpression* parensRegex = [NSRegularExpression
+                                            
regularExpressionWithPattern:@"[(][^()]+[)]"
+                                            options:0
+                                            error:&err];
+        
+        result = [NSString wmf_recursivelyUpdateString:result 
withRegex:parensRegex];
+        
+        // Nor do we want to try to handle ideographic brackets
+        err = nil;
+        NSRegularExpression* bracketsRegex = [NSRegularExpression
+                                              
regularExpressionWithPattern:@"\\[[^]]+]"
+                                              options:0
+                                              error:&err];
+        
+        range  = NSMakeRange(0, result.length);
+        result = [bracketsRegex stringByReplacingMatchesInString:result
+                                                         options:0
+                                                           range:range
+                                                    withTemplate:@""];
+        
+        // Unlike parens and brackets and unlike doubled up space in general,
+        // we do not want whitespace preceding the comma, ideographic comma,
+        // or semicolon
+        err = nil;
+        NSRegularExpression* whitespaceCommaSemicolonRegex = 
[NSRegularExpression
+                                                     
regularExpressionWithPattern:@"\\s+([,、;])"
                                                      options:0
-                                                       range:range
-                                                withTemplate:@"\n"];
-
-
-    // We probably don't want to try to handle ideographic parens
-    err = nil;
-    NSRegularExpression* parensRegex = [NSRegularExpression
-                                        
regularExpressionWithPattern:@"[(][^)]+[)]"
-                                                             options:0
-                                                               error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [parensRegex stringByReplacingMatchesInString:result
-                                                   options:0
-                                                     range:range
-                                              withTemplate:@""];
-
-    // Nor do we want to try to handle ideographic brackets
-    err = nil;
-    NSRegularExpression* bracketsRegex = [NSRegularExpression
-                                          
regularExpressionWithPattern:@"\\[[^]]+]"
-                                                               options:0
-                                                                 error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [bracketsRegex stringByReplacingMatchesInString:result
-                                                     options:0
-                                                       range:range
-                                                withTemplate:@""];
-
-    // Unlike parens and brackets and unlike doubled up space in general,
-    // we do not want whitespace preceding the comma or ideographic comma
-    err = nil;
-    NSRegularExpression* whitespaceCommaRegex = [NSRegularExpression
-                                                 
regularExpressionWithPattern:@"\\s+([,、])"
+                                                     error:&err];
+        range  = NSMakeRange(0, result.length);
+        result = [whitespaceCommaSemicolonRegex 
stringByReplacingMatchesInString:result
+                                                                options:0
+                                                                  range:range
+                                                           withTemplate:@"$1"];
+        
+        // Ideographic stops from TextExtracts, which were from OpenSearch
+        err = nil;
+        NSRegularExpression* whitespacePeriodRegex = [NSRegularExpression
+                                                      
regularExpressionWithPattern:@"\\s+([\\.|。|.|。])"
+                                                      options:0
+                                                      error:&err];
+        range  = NSMakeRange(0, result.length);
+        result = [whitespacePeriodRegex stringByReplacingMatchesInString:result
+                                                                 options:0
+                                                                   range:range
+                                                            
withTemplate:@"$1"];
+        
+        // In practice, we rarely care about doubled up whitespace in the
+        // string except for the actual space character
+        err = nil;
+        NSRegularExpression* spacesRegex = [NSRegularExpression
+                                            regularExpressionWithPattern:@" 
{2,}"
+                                            options:0
+                                            error:&err];
+        range  = NSMakeRange(0, result.length);
+        result = [spacesRegex stringByReplacingMatchesInString:result
+                                                       options:0
+                                                         range:range
+                                                  withTemplate:@" "];
+        
+        // Note about trailing colon characters: they usually look strange if 
kept,
+        // and removing them (plus spaces and newlines) doesn't often create 
merged
+        // words that look bad - these are usually at tag boundaries. For 
Latinized
+        // langs sometimes this means words like "include" finish the snippet.
+        // But as a matter of markup structure, something like a <p> tag
+        // shouldn't be </p> closed until something like <ul>...</ul> is 
closed.
+        // In fact, some sections have this layout, and some do not.
+        err = nil;
+        NSRegularExpression* leadingTrailingWhitespaceNewlineRegex = 
[NSRegularExpression
+                                                                      
regularExpressionWithPattern:@"^[\\s\n]+|[\\s\n:]+$"
                                                                       options:0
-                                                                        
error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [whitespaceCommaRegex stringByReplacingMatchesInString:result
-                                                            options:0
-                                                              range:range
-                                                       withTemplate:@"$1"];
+                                                                      
error:&err];
+        range  = NSMakeRange(0, result.length);
+        result = [leadingTrailingWhitespaceNewlineRegex 
stringByReplacingMatchesInString:result
+                                                                               
  options:0
+                                                                               
    range:range
+                                                                            
withTemplate:@""];
+        
+        return result;
+    }
+}
 
-    // Ideographic stops from TextExtracts, which were from OpenSearch
-    err = nil;
-    NSRegularExpression* whitespacePeriodRegex = [NSRegularExpression
-                                                  
regularExpressionWithPattern:@"\\s+([\\.|。|.|。])"
-                                                                       
options:0
-                                                                         
error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [whitespacePeriodRegex stringByReplacingMatchesInString:result
-                                                             options:0
-                                                               range:range
-                                                        withTemplate:@"$1"];
-
-    // In practice, we rarely care about doubled up whitespace in the
-    // string except for the actual space character
-    err = nil;
-    NSRegularExpression* spacesRegex = [NSRegularExpression
-                                        regularExpressionWithPattern:@" {2,}"
-                                                             options:0
-                                                               error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [spacesRegex stringByReplacingMatchesInString:result
-                                                   options:0
-                                                     range:range
-                                              withTemplate:@" "];
-
-    // Note about trailing colon characters: they usually look strange if kept,
-    // and removing them (plus spaces and newlines) doesn't often create merged
-    // words that look bad - these are usually at tag boundaries. For Latinized
-    // langs sometimes this means words like "include" finish the snippet.
-    // But as a matter of markup structure, something like a <p> tag
-    // shouldn't be </p> closed until something like <ul>...</ul> is closed.
-    // In fact, some sections have this layout, and some do not.
-    err = nil;
-    NSRegularExpression* leadingTrailingWhitespaceNewlineRegex = 
[NSRegularExpression
-                                                                  
regularExpressionWithPattern:@"^[\\s\n]+|[\\s\n:]+$"
-                                                                               
        options:0
-                                                                               
          error:&err];
-    range  = NSMakeRange(0, result.length);
-    result = [leadingTrailingWhitespaceNewlineRegex 
stringByReplacingMatchesInString:result
-                                                                             
options:0
-                                                                               
range:range
-                                                                        
withTemplate:@""];
-
-    return result;
++wmf_recursivelyUpdateString : (NSString*)string withRegex : 
(NSRegularExpression*)regex {
+    NSString* oldResult;
+    NSRange range;
+    @autoreleasepool {
+        do {
+            oldResult = [string copy];
+            range     = NSMakeRange(0, string.length);
+            string    = [regex stringByReplacingMatchesInString:string
+                                                        options:0
+                                                          range:range
+                                                   withTemplate:@""];
+        } while (![oldResult isEqualToString:string]);
+        return string;
+    }
 }
 
 @end

-- 
To view, visit https://gerrit.wikimedia.org/r/196298
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3c26877c7e4e220e84af9f192423384ea3b5c64e
Gerrit-PatchSet: 1
Gerrit-Project: apps/ios/wikipedia
Gerrit-Branch: master
Gerrit-Owner: Dr0ptp4kt <ab...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to