Title: [103082] trunk/Source/WebCore
Revision
103082
Author
a...@apple.com
Date
2011-12-16 10:59:40 -0800 (Fri, 16 Dec 2011)

Log Message

        Poor XPath performance when evaluating an _expression_ that returns a lot of nodes
        https://bugs.webkit.org/show_bug.cgi?id=74665
        <rdar://problem/10517146>

        Reviewed by Darin Adler.

        No change in funcitonality. Well covered by existing tests (ran them with zero cutoff to
        execute the new code path).

        Our sorting function is optimized for small node sets in large documents, and this is the
        opposite of it. Added another one that traverses the whole document, adding nodes from the
        node set to sorted list. That doesn't grow with the number of nodes nearly as fast.

        Cutoff amount chosen for the document referenced in bug - this is roughly where the algorithms
        have the same performance on it.

        * xml/XPathNodeSet.cpp:
        (WebCore::XPath::NodeSet::sort):
        (WebCore::XPath::findRootNode):
        (WebCore::XPath::NodeSet::traversalSort):
        * xml/XPathNodeSet.h:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (103081 => 103082)


--- trunk/Source/WebCore/ChangeLog	2011-12-16 18:43:57 UTC (rev 103081)
+++ trunk/Source/WebCore/ChangeLog	2011-12-16 18:59:40 UTC (rev 103082)
@@ -1,3 +1,27 @@
+2011-12-15  Alexey Proskuryakov  <a...@apple.com>
+
+        Poor XPath performance when evaluating an _expression_ that returns a lot of nodes
+        https://bugs.webkit.org/show_bug.cgi?id=74665
+        <rdar://problem/10517146>
+
+        Reviewed by Darin Adler.
+
+        No change in funcitonality. Well covered by existing tests (ran them with zero cutoff to
+        execute the new code path).
+
+        Our sorting function is optimized for small node sets in large documents, and this is the
+        opposite of it. Added another one that traverses the whole document, adding nodes from the
+        node set to sorted list. That doesn't grow with the number of nodes nearly as fast.
+
+        Cutoff amount chosen for the document referenced in bug - this is roughly where the algorithms
+        have the same performance on it.
+
+        * xml/XPathNodeSet.cpp:
+        (WebCore::XPath::NodeSet::sort):
+        (WebCore::XPath::findRootNode):
+        (WebCore::XPath::NodeSet::traversalSort):
+        * xml/XPathNodeSet.h:
+
 2011-12-15  Antti Koivisto  <an...@apple.com>
 
         https://bugs.webkit.org/show_bug.cgi?id=74677

Modified: trunk/Source/WebCore/xml/XPathNodeSet.cpp (103081 => 103082)


--- trunk/Source/WebCore/xml/XPathNodeSet.cpp	2011-12-16 18:43:57 UTC (rev 103081)
+++ trunk/Source/WebCore/xml/XPathNodeSet.cpp	2011-12-16 18:59:40 UTC (rev 103082)
@@ -33,6 +33,10 @@
 namespace WebCore {
 namespace XPath {
 
+// When a node set is large, sorting it by traversing the whole document is better (we can
+// assume that we aren't dealing with documents that we cannot even traverse in reasonable time).
+const unsigned traversalSortCutoff = 10000;
+
 static inline Node* parentWithDepth(unsigned depth, const Vector<Node*>& parents)
 {
     ASSERT(parents.size() >= depth + 1);
@@ -140,7 +144,12 @@
         const_cast<bool&>(m_isSorted) = true;
         return;
     }
-    
+
+    if (nodeCount > traversalSortCutoff) {
+        traversalSort();
+        return;
+    }
+
     bool containsAttributeNodes = false;
     
     Vector<Vector<Node*> > parentMatrix(nodeCount);
@@ -164,9 +173,62 @@
     for (unsigned i = 0; i < nodeCount; ++i)
         sortedNodes.append(parentMatrix[i][0]);
     
-    const_cast<Vector<RefPtr<Node> >& >(m_nodes).swap(sortedNodes);
+    const_cast<Vector<RefPtr<Node> >&>(m_nodes).swap(sortedNodes);
 }
 
+static Node* findRootNode(Node* node)
+{
+    if (node->isAttributeNode())
+        node = static_cast<Attr*>(node)->ownerElement();
+    if (node->inDocument())
+        node = node->document();
+    else {
+        while (Node* parent = node->parentNode())
+            node = parent;
+    }
+    return node;
+}
+
+void NodeSet::traversalSort() const
+{
+    HashSet<Node*> nodes;
+    bool containsAttributeNodes = false;
+
+    unsigned nodeCount = m_nodes.size();
+    ASSERT(nodeCount > 1);
+    for (unsigned i = 0; i < nodeCount; ++i) {
+        Node* node = m_nodes[i].get();
+        nodes.add(node);
+        if (node->isAttributeNode())
+            containsAttributeNodes = true;
+    }
+
+    Vector<RefPtr<Node> > sortedNodes;
+    sortedNodes.reserveInitialCapacity(nodeCount);
+
+    for (Node* n = findRootNode(m_nodes.first().get()); n; n = n->traverseNextNode()) {
+        if (nodes.contains(n))
+            sortedNodes.append(n);
+
+        if (!containsAttributeNodes || !n->isElementNode())
+            continue;
+
+        NamedNodeMap* attributes = toElement(n)->attributes(true /* read-only */);
+        if (!attributes)
+            continue;
+
+        unsigned attributeCount = attributes->length();
+        for (unsigned i = 0; i < attributeCount; ++i) {
+            Attr* attribute = attributes->attributeItem(i)->attr();
+            if (attribute && nodes.contains(attribute))
+                sortedNodes.append(attribute);
+        }
+    }
+
+    ASSERT(sortedNodes.size() == nodeCount);
+    const_cast<Vector<RefPtr<Node> >&>(m_nodes).swap(sortedNodes);
+}
+
 void NodeSet::reverse()
 {
     if (m_nodes.isEmpty())

Modified: trunk/Source/WebCore/xml/XPathNodeSet.h (103081 => 103082)


--- trunk/Source/WebCore/xml/XPathNodeSet.h	2011-12-16 18:43:57 UTC (rev 103081)
+++ trunk/Source/WebCore/xml/XPathNodeSet.h	2011-12-16 18:59:40 UTC (rev 103082)
@@ -71,6 +71,8 @@
             void reverse();
         
         private:
+            void traversalSort() const;
+
             bool m_isSorted;
             bool m_subtreesAreDisjoint;
             Vector<RefPtr<Node> > m_nodes;
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to