poppler/Dict.cc | 98 ++++++++++++++++++++++++++++++++++++++++++++------------ poppler/Dict.h | 2 + 2 files changed, 80 insertions(+), 20 deletions(-)
New commits: commit dd14ef6b211ac1c8a4f16bb6094dbfd6a09cbef9 Author: Albert Astals Cid <aa...@kde.org> Date: Sun Oct 17 12:46:55 2010 +0100 Improve dict lookup speed for big dicts Based on a patch by PaweŠWiejacha <pawel.wieja...@gmail.com> diff --git a/poppler/Dict.cc b/poppler/Dict.cc index a899bca..4dbf641 100644 --- a/poppler/Dict.cc +++ b/poppler/Dict.cc @@ -16,7 +16,8 @@ // Copyright (C) 2005 Kristian Høgsberg <k...@redhat.com> // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalc...@gmail.com> // Copyright (C) 2007-2008 Julien Rebetez <juli...@svn.gnome.org> -// Copyright (C) 2008 Albert Astals Cid <aa...@kde.org> +// Copyright (C) 2008, 2010 Albert Astals Cid <aa...@kde.org> +// Copyright (C) 2010 PaweŠWiejacha <pawel.wieja...@gmail.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -29,6 +30,7 @@ #pragma implementation #endif +#include <algorithm> #include <stddef.h> #include <string.h> #include "goo/gmem.h" @@ -40,11 +42,37 @@ // Dict //------------------------------------------------------------------------ +static const int SORT_LENGTH_LOWER_LIMIT = 32; + +static inline bool cmpDictEntries(const DictEntry &e1, const DictEntry &e2) +{ + return strcmp(e1.key, e2.key) < 0; +} + +static int binarySearch(const char *key, DictEntry *entries, int length) +{ + int first = 0; + int end = length - 1; + while (first <= end) { + const int middle = (first + end) / 2; + const int res = strcmp(key, entries[middle].key); + if (res == 0) { + return middle; + } else if (res < 0) { + end = middle - 1; + } else { + first = middle + 1; + } + } + return -1; +} + Dict::Dict(XRef *xrefA) { xref = xrefA; entries = NULL; size = length = 0; ref = 1; + sorted = gFalse; } Dict::Dict(Dict* dictA) { @@ -52,6 +80,7 @@ Dict::Dict(Dict* dictA) { size = length = dictA->length; ref = 1; + sorted = dictA->sorted; entries = (DictEntry *)gmallocn(size, sizeof(DictEntry)); for (int i=0; i<length; i++) { entries[i].key = strdup(dictA->entries[i].key); @@ -70,6 +99,12 @@ Dict::~Dict() { } void Dict::add(char *key, Object *val) { + if (sorted) { + // We use add on very few occasions so + // virtually this will never be hit + sorted = gFalse; + } + if (length == size) { if (length == 0) { size = 8; @@ -84,33 +119,56 @@ void Dict::add(char *key, Object *val) { } inline DictEntry *Dict::find(char *key) { - int i; + if (!sorted && length >= SORT_LENGTH_LOWER_LIMIT) + { + sorted = gTrue; + std::sort(entries, entries+length, cmpDictEntries); + } + + if (sorted) { + const int pos = binarySearch(key, entries, length); + if (pos != -1) { + return &entries[pos]; + } + } else { + int i; - for (i = length - 1; i >=0; --i) { - if (!strcmp(key, entries[i].key)) - return &entries[i]; + for (i = length - 1; i >=0; --i) { + if (!strcmp(key, entries[i].key)) + return &entries[i]; + } } return NULL; } void Dict::remove(char *key) { - int i; - bool found = false; - DictEntry tmp; - if(length == 0) return; - - for(i=0; i<length; i++) { - if (!strcmp(key, entries[i].key)) { - found = true; - break; + if (sorted) { + const int pos = binarySearch(key, entries, length); + if (pos != -1) { + length -= 1; + if (pos != length) { + memmove(&entries[pos], &entries[pos + 1], (length - pos) * sizeof(DictEntry)); + } + } + } else { + int i; + bool found = false; + DictEntry tmp; + if(length == 0) return; + + for(i=0; i<length; i++) { + if (!strcmp(key, entries[i].key)) { + found = true; + break; + } } + if(!found) return; + //replace the deleted entry with the last entry + length -= 1; + tmp = entries[length]; + if (i!=length) //don't copy the last entry if it is deleted + entries[i] = tmp; } - if(!found) return; - //replace the deleted entry with the last entry - length -= 1; - tmp = entries[length]; - if (i!=length) //don't copy the last entry if it is deleted - entries[i] = tmp; } void Dict::set(char *key, Object *val) { diff --git a/poppler/Dict.h b/poppler/Dict.h index a76bc89..6dfc403 100644 --- a/poppler/Dict.h +++ b/poppler/Dict.h @@ -17,6 +17,7 @@ // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalc...@gmail.com> // Copyright (C) 2007-2008 Julien Rebetez <juli...@svn.gnome.org> // Copyright (C) 2010 Albert Astals Cid <aa...@kde.org> +// Copyright (C) 2010 PaweŠWiejacha <pawel.wieja...@gmail.com> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -89,6 +90,7 @@ public: private: + GBool sorted; XRef *xref; // the xref table for this PDF file DictEntry *entries; // array of entries int size; // size of <entries> array
_______________________________________________ poppler mailing list poppler@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/poppler