Reviewers: Erik Corry, Message: Small review.
Description: Minor changes, mostly cosmetic, in string search. Please review this at http://codereview.chromium.org/14505 Affected files: M src/runtime.cc Index: src/runtime.cc diff --git a/src/runtime.cc b/src/runtime.cc index 24b19047eb564ff6b4c1842ed99dba46bb9112f2..b9f1f48ef2138a4179b5436b4572e70d851e6e49 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -1151,15 +1151,14 @@ static inline int CharOccurence(int char_code) { return bad_char_occurence[char_code % kBMAlphabetSize]; } -// Restricted simplified Boyer-Moore string matching. Restricts tables to a -// suffix of long pattern strings and handles only equivalence classes -// of the full alphabet. This allows us to ensure that tables take only -// a fixed amount of space. +// Restricted simplified Boyer-Moore string matching. +// Uses only the bad-shift table of Boyer-Moore and only uses it +// for the character compared to the last character of the needle. template <typename schar, typename pchar> -static int BoyerMooreSimplified(Vector<const schar> subject, - Vector<const pchar> pattern, - int start_index, - bool* complete) { +static int BoyerMooreHorsepool(Vector<const schar> subject, + Vector<const pchar> pattern, + int start_index, + bool* complete) { int n = subject.length(); int m = pattern.length(); // Only preprocess at most kBMMaxShift last characters of pattern. @@ -1170,6 +1169,7 @@ static int BoyerMooreSimplified(Vector<const schar> subject, int badness = -m; // How bad we are doing without a good-suffix table. int idx; // No matches found prior to this index. pchar last_char = pattern[m - 1]; + int last_char_shift = m - 1 - CharOccurence<schar, pchar>(last_char); // Perform search for (idx = start_index; idx <= n - m;) { int j = m - 1; @@ -1185,19 +1185,17 @@ static int BoyerMooreSimplified(Vector<const schar> subject, } } j--; - while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; + while (j >= 0 && pattern[j] == (subject[idx + j])) j--; if (j < 0) { *complete = true; return idx; } else { - int bc_occ = CharOccurence<schar, pchar>(c); - int shift = bc_occ < j ? j - bc_occ : 1; - idx += shift; + idx += last_char_shift; // Badness increases by the number of characters we have // checked, and decreases by the number of characters we // can skip by shifting. It's a measure of how we are doing // compared to reading each character exactly once. - badness += (m - j) - shift; + badness += (m - j) - last_char_shift; if (badness > 0) { *complete = false; return idx; @@ -1237,12 +1235,15 @@ static int BoyerMooreIndexOf(Vector<const schar> subject, return idx; } else if (j < start) { // we have matched more than our tables allow us to be smart about. - idx += 1; + // Fall back on BMH shift. + idx += m - 1 - CharOccurence<schar, pchar>(last_char); } else { int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. int bc_occ = CharOccurence<schar, pchar>(c); int shift = j - bc_occ; // Bad-char shift. - shift = (gs_shift > shift) ? gs_shift : shift; + if (gs_shift > shift) { + shift = gs_shift; + } idx += shift; } } while (idx <= n - m); @@ -1286,7 +1287,7 @@ static int SimpleIndexOf(Vector<const schar> subject, badness++; if (badness > 0) { *complete = false; - return (i); + return i; } if (subject[i] != pattern_first_char) continue; int j = 1; @@ -1357,7 +1358,7 @@ static int StringMatchStrategy(Vector<const schar> sub, bool complete; int idx = SimpleIndexOf(sub, pat, start_index, &complete); if (complete) return idx; - idx = BoyerMooreSimplified(sub, pat, idx, &complete); + idx = BoyerMooreHorsepool(sub, pat, idx, &complete); if (complete) return idx; return BoyerMooreIndexOf(sub, pat, idx); } --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
