result

sshafroi Mon, 17 Nov 2008 00:46:26 -0800

Author: sshafroi
Date: 2008-11-17 09:58:31 +0100 (Mon, 17 Nov 2008)
New Revision: 6941


Modified:
   trunk/result-spi/src/main/java/no/sesat/search/result/StringChopper.java
Log:
Issue SKER4947:  (StringChopper will not handle cdata)

Cleanup (braces, capital enums, non synchronized stack)

Some measurements:

all results are for 10x

http://localhost.no:8080/search/?q=oslo&x=40&y=15&c=m
New time is: 120
Old time is: 474 executed times:34308

http://localhost.no:8080/search/?q=brann&x=32&y=17&c=m

New time is: 127ms
Old time is: 284ms executed times:40276


http://localhost.no:8080/search/?q=brann&x=33&y=22&c=d
New time is: 25
Old time is: 83 executed times:8580


http://localhost.no:8080/katalog/brann/
New time is: 22
Old time is: 44 executed times:12090

This is measured by collecting the time spent inside the chop function. It is 
only done 10 times. Keep in mind that some of this might be done in parallel, 
so the result would probably be different when measuring it as a whole.

Still in a news search for oslo, we will probably see a significant overall 
speedup. 35ms for one search makes a difference.




Modified: 
trunk/result-spi/src/main/java/no/sesat/search/result/StringChopper.java
===================================================================
--- trunk/result-spi/src/main/java/no/sesat/search/result/StringChopper.java    
2008-11-16 22:09:16 UTC (rev 6940)
+++ trunk/result-spi/src/main/java/no/sesat/search/result/StringChopper.java    
2008-11-17 08:58:31 UTC (rev 6941)
@@ -17,18 +17,21 @@
 
 package no.sesat.search.result;
 
-import java.util.Stack;
+import java.util.ArrayDeque;
+import java.util.Deque;
 
 public class StringChopper {
 
     private enum State {
-        none, tag, startTag, endTag, cdata, comment, declaration
+        NONE, TAG, STARTTAG, ENDTAG, CDATA, COMMENT, DECLARATION
     };
 
     /**
-     * Truncate s to the given length at closest space or xml tag. Any xml 
tags will be closed/balanced.
+     * Truncate s to the given length at closest space or xml tag. Any xml tags
+     * will be closed/balanced.
      *
-     * @param input The string that should be truncated.
+     * @param input
+     *            The string that should be truncated.
      * @param length
      * @return The truncated string
      */
@@ -37,30 +40,37 @@
     }
 
     /**
-     * Truncate s to the given length or to closest space/tag depending on 
chop. Any xml tags will be closed/balanced.
-     * @param input The string that should be truncated.
-     * @param length max length of string (if choped the string will be '...' 
longer then max.)
-     * @param chop If words should be choped, or if we chop inbetween spaces.
+     * Truncate s to the given length or to closest space/tag depending on 
chop.
+     * Any xml tags will be closed/balanced.
+     *
+     * @param input
+     *            The string that should be truncated.
+     * @param length
+     *            max length of string (if choped the string will be '...'
+     *            longer then max.)
+     * @param chop
+     *            If words should be choped, or if we chop inbetween spaces.
      * @return The truncated string
      */
     public static String chop(final String input, final int length, final 
boolean chop) {
 
-        if (input == null)
+        if (input == null) {
             return null;
+        }
 
-        Stack<Integer> stack = new Stack<Integer>();
+        final Deque<Integer> stack = new ArrayDeque<Integer>();
         char[] s = input.toCharArray();
-        StringBuilder res = new StringBuilder(s.length);
-        State state = State.none;
+        final StringBuilder res = new StringBuilder(s.length);
+        State state = State.NONE;
         int count = 0;
         int i = 0;
 
         main: for (; i < s.length; i++) {
             char c = s[i];
             switch (state) {
-            case none:
+            case NONE:
                 if (c == '<') {
-                    state = State.tag;
+                    state = State.TAG;
                 } else {
                     count++;
                     if (count == length) {
@@ -70,63 +80,65 @@
                 }
                 break;
 
-            case tag:
+            case TAG:
                 if (c == '/') {
-                    state = State.endTag;
+                    state = State.ENDTAG;
                 } else if (c == '!') {
                     // ![CDATA[
                     if (s.length > (i + 7) && s[i + 1] == '[' && (s[i + 2] == 
'C' || s[i + 2] == 'c')
                             && (s[i + 3] == 'D' || s[i + 3] == 'd') && (s[i + 
4] == 'A' || s[i + 4] == 'a')
                             && (s[i + 5] == 'T' || s[i + 5] == 't') && (s[i + 
6] == 'A' || s[i + 6] == 'a')
                             && s[i + 7] == '[') {
-                        state = State.cdata;
+                        state = State.CDATA;
                         res.append("![CDATA[");
                         i += 7;
                         continue;
                     }
                     // !--
                     else if (s.length > (i + 2) && s[i + 1] == '-' && s[i + 2] 
== '-') {
-                        state = State.comment;
+                        state = State.COMMENT;
                         res.append("!--");
                         i += 2;
                         continue;
                     }
                 } else if (c == '?') {
-                    state = State.declaration;
+                    state = State.DECLARATION;
                 } else {
                     stack.push(i);
-                    state = State.startTag;
+                    state = State.STARTTAG;
                 }
                 break;
 
-            case startTag:
+            case STARTTAG:
                 if (c == '/') {
                     if (s.length > (i + 1) && s[i + 1] == '>') {
-                        state = State.none;
+                        state = State.NONE;
                         res.append("/>");
                         i += 1;
-                        if(!stack.isEmpty())
+                        if (!stack.isEmpty()) {
                             stack.pop();
+                        }
                         continue;
                     }
                 } else if (c == '>') {
-                    state = State.none;
+                    state = State.NONE;
                 }
                 break;
 
-            case endTag:
+            case ENDTAG:
                 if (c == '>') {
-                    state = State.none;
-                    if(!stack.isEmpty())
+                    state = State.NONE;
+                    if (!stack.isEmpty()) {
                         stack.pop();
+                    }
                 }
                 break;
 
-            case cdata:
+            case CDATA:
 
                 if (c == ']') {// ]]>
                     if (s.length > (i + 2) && s[i + 1] == ']' && s[i + 2] == 
'>') {
-                        state = State.none;
+                        state = State.NONE;
                         res.append("]]>");
                         i += 2;
                         continue;
@@ -140,11 +152,11 @@
                 }
                 break;
 
-            case comment:
+            case COMMENT:
                 if (c == '-') {
                     // -->
                     if (s.length > (i + 2) && s[i + 1] == '-' && s[i + 2] == 
'>') {
-                        state = State.none;
+                        state = State.NONE;
                         res.append("-->");
                         i += 2;
                         continue;
@@ -152,10 +164,10 @@
                 }
                 break;
 
-            case declaration:
+            case DECLARATION:
                 if (c == '?') {
                     if (s.length > (i + 1) && s[i + 1] == '>') {
-                        state = State.none;
+                        state = State.NONE;
                         res.append("?>");
                         i += 1;
                         continue;
@@ -172,7 +184,7 @@
                 res.append("...");
             } else {
                 for (int k = i; k > 0; k--) {
-                    if (s[k] == ' ' || s[k] == ((state == State.cdata) ? '[' : 
'>')) {
+                    if (s[k] == ' ' || s[k] == ((state == State.CDATA) ? '[' : 
'>')) {
                         res.setLength(k + 1);
                         res.append("...");
                         break dot;
@@ -183,7 +195,7 @@
         }
 
         // close CDATA if we are in one
-        if (state == State.cdata) {
+        if (state == State.CDATA) {
             res.append("]]>");
         }
 

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

r6941 - trunk/result-spi/src/main/java/no/sesat/search/result

Reply via email to