Title: [264729] trunk
- Revision
- 264729
- Author
- wenson_hs...@apple.com
- Date
- 2020-07-22 15:10:20 -0700 (Wed, 22 Jul 2020)
Log Message
Article headlines are split across multiple lines after translating tagesschau.de
https://bugs.webkit.org/show_bug.cgi?id=214652
<rdar://problem/63995571>
Reviewed by Tim Horton.
Source/WebCore:
Treat block-level heading elements (`h1` through `h6`) as item boundaries. On this website, we currently vend
the main heading and sub-heading as separate tokens in the same item, which sometimes causes the machine
translation framework to try and rearrange and split tokens, since it treats the combination of the main heading
and sub-heading text as a single sentence to be translated.
Test: TextManipulation.StartTextManipulationExtractsHeadingElementsAsSeparateItems
* editing/TextManipulationController.cpp:
(WebCore::isEnclosingItemBoundaryElement):
Tools:
Adds a new API test (a reduced version of article headlines from the front page of tagesschau.de).
* TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (264728 => 264729)
--- trunk/Source/WebCore/ChangeLog 2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Source/WebCore/ChangeLog 2020-07-22 22:10:20 UTC (rev 264729)
@@ -1,3 +1,21 @@
+2020-07-22 Wenson Hsieh <wenson_hs...@apple.com>
+
+ Article headlines are split across multiple lines after translating tagesschau.de
+ https://bugs.webkit.org/show_bug.cgi?id=214652
+ <rdar://problem/63995571>
+
+ Reviewed by Tim Horton.
+
+ Treat block-level heading elements (`h1` through `h6`) as item boundaries. On this website, we currently vend
+ the main heading and sub-heading as separate tokens in the same item, which sometimes causes the machine
+ translation framework to try and rearrange and split tokens, since it treats the combination of the main heading
+ and sub-heading text as a single sentence to be translated.
+
+ Test: TextManipulation.StartTextManipulationExtractsHeadingElementsAsSeparateItems
+
+ * editing/TextManipulationController.cpp:
+ (WebCore::isEnclosingItemBoundaryElement):
+
2020-07-22 Alex Christensen <achristen...@webkit.org>
Safari does not present CertificateInfo for service-worker served documents
Modified: trunk/Source/WebCore/editing/TextManipulationController.cpp (264728 => 264729)
--- trunk/Source/WebCore/editing/TextManipulationController.cpp 2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Source/WebCore/editing/TextManipulationController.cpp 2020-07-22 22:10:20 UTC (rev 264729)
@@ -316,6 +316,10 @@
if (element.hasTagName(HTMLNames::spanTag) && displayType == DisplayType::InlineBlock)
return true;
+ if (displayType == DisplayType::Block && (element.hasTagName(HTMLNames::h1Tag) || element.hasTagName(HTMLNames::h2Tag) || element.hasTagName(HTMLNames::h3Tag)
+ || element.hasTagName(HTMLNames::h4Tag) || element.hasTagName(HTMLNames::h5Tag) || element.hasTagName(HTMLNames::h6Tag)))
+ return true;
+
return false;
}
Modified: trunk/Tools/ChangeLog (264728 => 264729)
--- trunk/Tools/ChangeLog 2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Tools/ChangeLog 2020-07-22 22:10:20 UTC (rev 264729)
@@ -1,3 +1,15 @@
+2020-07-22 Wenson Hsieh <wenson_hs...@apple.com>
+
+ Article headlines are split across multiple lines after translating tagesschau.de
+ https://bugs.webkit.org/show_bug.cgi?id=214652
+ <rdar://problem/63995571>
+
+ Reviewed by Tim Horton.
+
+ Adds a new API test (a reduced version of article headlines from the front page of tagesschau.de).
+
+ * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm:
+
2020-07-22 Alex Christensen <achristen...@webkit.org>
Safari does not present CertificateInfo for service-worker served documents
Modified: trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm (264728 => 264729)
--- trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm 2020-07-22 21:55:03 UTC (rev 264728)
+++ trunk/Tools/TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm 2020-07-22 22:10:20 UTC (rev 264729)
@@ -1092,6 +1092,35 @@
TestWebKitAPI::Util::run(&done);
}
+TEST(TextManipulation, StartTextManipulationExtractsHeadingElementsAsSeparateItems)
+{
+ auto delegate = adoptNS([[TextManipulationDelegate alloc] init]);
+ auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:NSMakeRect(0, 0, 400, 400)]);
+ [webView _setTextManipulationDelegate:delegate.get()];
+
+ [webView synchronouslyLoadHTMLString:@"<!DOCTYPE html>"
+ "<html>"
+ " <body>"
+ " <div style='float: left; width: 300px; height: 150px;'></div>"
+ " <p style='float: left; width: 600px;'>Hello world</p>"
+ " <h4 style='float: left; width: 600px;'>This is a heading</h4>"
+ " </body>"
+ "</html>"];
+
+ done = false;
+ [webView _startTextManipulationsWithConfiguration:nil completion:^{
+ done = true;
+ }];
+ TestWebKitAPI::Util::run(&done);
+
+ auto items = [delegate items];
+ EXPECT_EQ(items.count, 2UL);
+ EXPECT_EQ(items[0].tokens.count, 1UL);
+ EXPECT_WK_STREQ("Hello world", items[0].tokens[0].content);
+ EXPECT_EQ(items[1].tokens.count, 1UL);
+ EXPECT_WK_STREQ("This is a heading", items[1].tokens[0].content);
+}
+
struct Token {
NSString *identifier;
NSString *content;
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes