Title: [121858] trunk/Source/WebCore
Revision
121858
Author
k...@webkit.org
Date
2012-07-04 08:31:53 -0700 (Wed, 04 Jul 2012)

Log Message

Stop tracking line number in tokenizer
https://bugs.webkit.org/show_bug.cgi?id=90544

Reviewed by Adam Barth.

Because SegmentedString knows how to track the current line and column,
a tokenizer does not need to keep track of the current line by itself.
No behavior change, so no new tests.

* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::pumpTokenizer):
(WebCore::HTMLDocumentParser::lineNumber):
(WebCore::HTMLDocumentParser::textPosition):
* html/parser/HTMLTokenizer.cpp:
(WebCore::HTMLTokenizer::reset):
(WebCore::HTMLTokenizer::flushBufferedEndTag):
(WebCore):
(WebCore::HTMLTokenizer::nextToken):
* html/parser/HTMLTreeBuilder.cpp:
(WebCore::HTMLTreeBuilder::processScriptStartTag):
* html/track/WebVTTTokenizer.cpp:
(WebCore::WebVTTTokenizer::reset):
(WebCore::WebVTTTokenizer::nextToken):
* platform/text/SegmentedString.cpp:
(WebCore::SegmentedString::advanceAndUpdateLineNumberSlowCase):
* platform/text/SegmentedString.h:
(WebCore::SegmentedString::advancePastNewlineAndUpdateLineNumber):
(WebCore::SegmentedString::advanceAndUpdateLineNumber):
(SegmentedString):
* xml/parser/MarkupTokenizerBase.h:
(WebCore::MarkupTokenizerBase::InputStreamPreprocessor::peek):
(WebCore::MarkupTokenizerBase::InputStreamPreprocessor::advance):
(WebCore::MarkupTokenizerBase::emitAndResumeIn):
(WebCore::MarkupTokenizerBase::emitEndOfFile):
(WebCore::MarkupTokenizerBase::reset):
(MarkupTokenizerBase):
* xml/parser/MarkupTokenizerInlineMethods.h:
(WebCore):
* xml/parser/XMLTokenizer.cpp:
(WebCore::XMLTokenizer::nextToken):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (121857 => 121858)


--- trunk/Source/WebCore/ChangeLog	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/ChangeLog	2012-07-04 15:31:53 UTC (rev 121858)
@@ -1,3 +1,46 @@
+2012-07-04  Kwang Yul Seo  <sk...@company100.net>
+
+        Stop tracking line number in tokenizer
+        https://bugs.webkit.org/show_bug.cgi?id=90544
+
+        Reviewed by Adam Barth.
+
+        Because SegmentedString knows how to track the current line and column,
+        a tokenizer does not need to keep track of the current line by itself.
+        No behavior change, so no new tests.
+
+        * html/parser/HTMLDocumentParser.cpp:
+        (WebCore::HTMLDocumentParser::pumpTokenizer):
+        (WebCore::HTMLDocumentParser::lineNumber):
+        (WebCore::HTMLDocumentParser::textPosition):
+        * html/parser/HTMLTokenizer.cpp:
+        (WebCore::HTMLTokenizer::reset):
+        (WebCore::HTMLTokenizer::flushBufferedEndTag):
+        (WebCore):
+        (WebCore::HTMLTokenizer::nextToken):
+        * html/parser/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::processScriptStartTag):
+        * html/track/WebVTTTokenizer.cpp:
+        (WebCore::WebVTTTokenizer::reset):
+        (WebCore::WebVTTTokenizer::nextToken):
+        * platform/text/SegmentedString.cpp:
+        (WebCore::SegmentedString::advanceAndUpdateLineNumberSlowCase):
+        * platform/text/SegmentedString.h:
+        (WebCore::SegmentedString::advancePastNewlineAndUpdateLineNumber):
+        (WebCore::SegmentedString::advanceAndUpdateLineNumber):
+        (SegmentedString):
+        * xml/parser/MarkupTokenizerBase.h:
+        (WebCore::MarkupTokenizerBase::InputStreamPreprocessor::peek):
+        (WebCore::MarkupTokenizerBase::InputStreamPreprocessor::advance):
+        (WebCore::MarkupTokenizerBase::emitAndResumeIn):
+        (WebCore::MarkupTokenizerBase::emitEndOfFile):
+        (WebCore::MarkupTokenizerBase::reset):
+        (MarkupTokenizerBase):
+        * xml/parser/MarkupTokenizerInlineMethods.h:
+        (WebCore):
+        * xml/parser/XMLTokenizer.cpp:
+        (WebCore::XMLTokenizer::nextToken):
+
 2012-07-04  Ryuan Choi  <ryuan.c...@samsung.com>
 
         [CMAKE] Add GENERATE_BINDINGS macro to share the codes which use generate-bindings.pl.

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (121857 => 121858)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -258,7 +258,7 @@
     // FIXME: m_input.current().length() is only accurate if we
     // end up parsing the whole buffer in this pump.  We should pass how
     // much we parsed as part of didWriteHTML instead of willWriteHTML.
-    InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().length(), m_tokenizer->lineNumber().zeroBasedInt());
+    InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().length(), m_input.current().currentLine().zeroBasedInt());
 
     while (canTakeNextToken(mode, session) && !session.needsYield) {
         if (!isParsingFragment())
@@ -298,7 +298,7 @@
         m_preloadScanner->scan();
     }
 
-    InspectorInstrumentation::didWriteHTML(cookie, m_tokenizer->lineNumber().zeroBasedInt());
+    InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt());
 }
 
 bool HTMLDocumentParser::hasInsertionPoint()
@@ -448,7 +448,7 @@
 
 OrdinalNumber HTMLDocumentParser::lineNumber() const
 {
-    return m_tokenizer->lineNumber();
+    return m_input.current().currentLine();
 }
 
 TextPosition HTMLDocumentParser::textPosition() const
@@ -456,7 +456,6 @@
     const SegmentedString& currentString = m_input.current();
     OrdinalNumber line = currentString.currentLine();
     OrdinalNumber column = currentString.currentColumn();
-    ASSERT(m_tokenizer->lineNumber() == line);
 
     return TextPosition(line, column);
 }

Modified: trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp (121857 => 121858)


--- trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -134,7 +134,6 @@
 {
     m_state = HTMLTokenizerState::DataState;
     m_token = 0;
-    m_lineNumber = 0;
     m_forceNullCharacterReplacement = false;
     m_shouldAllowCDATA = false;
     m_additionalAllowedCharacter = '\0';
@@ -160,7 +159,7 @@
 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
 {
     ASSERT(m_token->type() == HTMLTokenTypes::Character || m_token->type() == HTMLTokenTypes::Uninitialized);
-    source.advance(m_lineNumber);
+    source.advanceAndUpdateLineNumber();
     if (m_token->type() == HTMLTokenTypes::Character)
         return true;
     m_token->beginEndTag(m_bufferedEndTagName);
@@ -175,7 +174,7 @@
         if (flushBufferedEndTag(source))                                   \
             return true;                                                   \
         if (source.isEmpty()                                               \
-            || !m_inputStreamPreprocessor.peek(source, m_lineNumber))      \
+            || !m_inputStreamPreprocessor.peek(source))                    \
             return haveBufferedCharacterToken();                           \
         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
         goto stateName;                                                    \
@@ -207,7 +206,7 @@
         }
     }
 
-    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
+    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
         return haveBufferedCharacterToken();
     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
 

Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (121857 => 121858)


--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -2646,8 +2646,6 @@
 
     TextPosition position = m_parser->textPosition();
 
-    ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
-
     m_lastScriptElementStartPosition = position;
 
     setInsertionMode(TextMode);

Modified: trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp (121857 => 121858)


--- trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/html/track/WebVTTTokenizer.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -65,7 +65,6 @@
 {
     m_state = WebVTTTokenizerState::DataState;
     m_token = 0;
-    m_lineNumber = 0;
     m_buffer.clear();
 }
     
@@ -76,7 +75,7 @@
     ASSERT(!m_token || m_token == &token || token.type() == WebVTTTokenTypes::Uninitialized);
     m_token = &token;
 
-    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
+    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
         return haveBufferedCharacterToken();
 
     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();

Modified: trunk/Source/WebCore/platform/text/SegmentedString.cpp (121857 => 121858)


--- trunk/Source/WebCore/platform/text/SegmentedString.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/platform/text/SegmentedString.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -222,14 +222,13 @@
     m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
 }
 
-void SegmentedString::advanceSlowCase(int& lineNumber)
+void SegmentedString::advanceAndUpdateLineNumberSlowCase()
 {
     if (m_pushedChar1) {
         m_pushedChar1 = m_pushedChar2;
         m_pushedChar2 = 0;
     } else if (m_currentString.m_current) {
         if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers()) {
-            ++lineNumber;
             ++m_currentLine;
             // Plus 1 because numberOfCharactersConsumed value hasn't incremented yet; it does with m_length decrement below.
             m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;

Modified: trunk/Source/WebCore/platform/text/SegmentedString.h (121857 => 121858)


--- trunk/Source/WebCore/platform/text/SegmentedString.h	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/platform/text/SegmentedString.h	2012-07-04 15:31:53 UTC (rev 121858)
@@ -157,12 +157,11 @@
         advance();
     }
 
-    void advancePastNewline(int& lineNumber)
+    void advancePastNewlineAndUpdateLineNumber()
     {
         ASSERT(*current() == '\n');
         if (!m_pushedChar1 && m_currentString.m_length > 1) {
             int newLineFlag = m_currentString.doNotExcludeLineNumbers();
-            lineNumber += newLineFlag;
             m_currentLine += newLineFlag;
             if (newLineFlag)
                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
@@ -170,7 +169,7 @@
             m_currentChar = ++m_currentString.m_current;
             return;
         }
-        advanceSlowCase(lineNumber);
+        advanceAndUpdateLineNumberSlowCase();
     }
     
     void advancePastNonNewline()
@@ -184,11 +183,10 @@
         advanceSlowCase();
     }
     
-    void advance(int& lineNumber)
+    void advanceAndUpdateLineNumber()
     {
         if (!m_pushedChar1 && m_currentString.m_length > 1) {
             int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
-            lineNumber += newLineFlag;
             m_currentLine += newLineFlag;
             if (newLineFlag)
                 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
@@ -196,7 +194,7 @@
             m_currentChar = ++m_currentString.m_current;
             return;
         }
-        advanceSlowCase(lineNumber);
+        advanceAndUpdateLineNumberSlowCase();
     }
 
     // Writes the consumed characters into consumedCharacters, which must
@@ -234,7 +232,7 @@
     void prepend(const SegmentedSubstring&);
 
     void advanceSlowCase();
-    void advanceSlowCase(int& lineNumber);
+    void advanceAndUpdateLineNumberSlowCase();
     void advanceSubstring();
     const UChar* current() const { return m_currentChar; }
 

Modified: trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h (121857 => 121858)


--- trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h	2012-07-04 15:31:53 UTC (rev 121858)
@@ -45,8 +45,6 @@
 public:
     virtual ~MarkupTokenizerBase() { }
 
-    OrdinalNumber lineNumber() const { return OrdinalNumber::fromZeroBasedInt(m_lineNumber); }
-
     typename State::State state() const { return m_state; }
     void setState(typename State::State state) { m_state = state; }
 
@@ -73,7 +71,7 @@
         // Returns whether we succeeded in peeking at the next character.
         // The only way we can fail to peek is if there are no more
         // characters in |source| (after collapsing \r\n, etc).
-        ALWAYS_INLINE bool peek(SegmentedString& source, int& lineNumber)
+        ALWAYS_INLINE bool peek(SegmentedString& source)
         {
         PeekAgain:
             m_nextInputCharacter = *source;
@@ -90,7 +88,7 @@
 
             if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
                 m_skipNextNewLine = false;
-                source.advancePastNewline(lineNumber);
+                source.advancePastNewlineAndUpdateLineNumber();
                 if (source.isEmpty())
                     return false;
                 m_nextInputCharacter = *source;
@@ -118,12 +116,12 @@
         }
 
         // Returns whether there are more characters in |source| after advancing.
-        bool advance(SegmentedString& source, int& lineNumber)
+        bool advance(SegmentedString& source)
         {
-            source.advance(lineNumber);
+            source.advanceAndUpdateLineNumber();
             if (source.isEmpty())
                 return false;
-            return peek(source, lineNumber);
+            return peek(source);
         }
 
         static const UChar endOfFileMarker = 0;
@@ -156,7 +154,7 @@
     inline bool emitAndResumeIn(SegmentedString& source, typename State::State state)
     {
         m_state = state;
-        source.advance(m_lineNumber);
+        source.advanceAndUpdateLineNumber();
         return true;
     }
     
@@ -172,7 +170,7 @@
         if (haveBufferedCharacterToken())
             return true;
         m_state = State::DataState;
-        source.advance(m_lineNumber);
+        source.advanceAndUpdateLineNumber();
         m_token->clear();
         m_token->makeEndOfFile();
         return true;
@@ -182,7 +180,6 @@
     {
         m_state = State::DataState;
         m_token = 0;
-        m_lineNumber = 0;
     }
 
     inline bool haveBufferedCharacterToken()
@@ -195,7 +192,6 @@
     // m_token is owned by the caller. If nextToken is not on the stack,
     // this member might be pointing to unallocated memory.
     Token* m_token;
-    int m_lineNumber;
 
     bool m_forceNullCharacterReplacement;
 

Modified: trunk/Source/WebCore/xml/parser/MarkupTokenizerInlineMethods.h (121857 => 121858)


--- trunk/Source/WebCore/xml/parser/MarkupTokenizerInlineMethods.h	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/xml/parser/MarkupTokenizerInlineMethods.h	2012-07-04 15:31:53 UTC (rev 121858)
@@ -71,7 +71,7 @@
 #define ADVANCE_TO(prefix, stateName)                                      \
     do {                                                                   \
         m_state = prefix::stateName;                                       \
-        if (!m_inputStreamPreprocessor.advance(source, m_lineNumber))      \
+        if (!m_inputStreamPreprocessor.advance(source))                    \
             return haveBufferedCharacterToken();                           \
         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
         goto stateName;                                                    \
@@ -84,7 +84,7 @@
 #define SWITCH_TO(prefix, stateName)                                       \
     do {                                                                   \
         m_state = prefix::stateName;                                       \
-        if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
+        if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))   \
             return haveBufferedCharacterToken();                           \
         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
         goto stateName;                                                    \

Modified: trunk/Source/WebCore/xml/parser/XMLTokenizer.cpp (121857 => 121858)


--- trunk/Source/WebCore/xml/parser/XMLTokenizer.cpp	2012-07-04 15:25:35 UTC (rev 121857)
+++ trunk/Source/WebCore/xml/parser/XMLTokenizer.cpp	2012-07-04 15:31:53 UTC (rev 121858)
@@ -225,7 +225,7 @@
     ASSERT(!m_token || m_token == &token || token.type() == XMLTokenTypes::Uninitialized);
     m_token = &token;
 
-    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
+    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
         return haveBufferedCharacterToken();
     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to