Kelson has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/343897 )

Change subject: Do not store the snippet nor the size of the content in the 
database.
......................................................................


Do not store the snippet nor the size of the content in the database.

Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
---
M zimwriterfs/indexer.cpp
M zimwriterfs/indexer.h
M zimwriterfs/xapianIndexer.cpp
M zimwriterfs/xapianIndexer.h
4 files changed, 2 insertions(+), 28 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimwriterfs/indexer.cpp b/zimwriterfs/indexer.cpp
index 6c26fc9..b83abd4 100644
--- a/zimwriterfs/indexer.cpp
+++ b/zimwriterfs/indexer.cpp
@@ -84,8 +84,6 @@
                  token.title,
                  token.keywords,
                  token.content,
-                 token.snippet,
-                 token.size,
                  token.wordCount
                  );
 
diff --git a/zimwriterfs/indexer.h b/zimwriterfs/indexer.h
index 02d989b..3291e36 100644
--- a/zimwriterfs/indexer.h
+++ b/zimwriterfs/indexer.h
@@ -46,8 +46,6 @@
     string title;
     string keywords;
     string content;
-    string snippet;
-    string size;
     string wordCount;
 };
 
@@ -70,8 +68,6 @@
                       const string &unaccentedTitle,
                       const string &keywords,
                       const string &content,
-                      const string &snippet,
-                      const string &size,
                       const string &wordCount) = 0;
     virtual void flush() = 0;
     virtual void indexingPostlude() = 0;
diff --git a/zimwriterfs/xapianIndexer.cpp b/zimwriterfs/xapianIndexer.cpp
index 65129b7..db27f9d 100644
--- a/zimwriterfs/xapianIndexer.cpp
+++ b/zimwriterfs/xapianIndexer.cpp
@@ -52,7 +52,7 @@
 void XapianIndexer::indexingPrelude(const string indexPath_) {
     indexPath = indexPath_;
     this->writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", 
Xapian::DB_CREATE_OR_OVERWRITE);
-    this->writableDatabase.set_metadata("valuesmap", 
"title:0;snippet:1;size:2;wordcount:3");
+    this->writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1");
     this->writableDatabase.begin_transaction(true);
 
     /* Insert the stopwords */
@@ -72,17 +72,13 @@
                           const string &unaccentedTitle,
                           const string &keywords,
                           const string &content,
-                          const string &snippet,
-                          const string &size,
                           const string &wordCount) {
 
     /* Put the data in the document */
     Xapian::Document currentDocument;
     currentDocument.clear_values();
     currentDocument.add_value(0, title);
-    currentDocument.add_value(1, snippet);
-    currentDocument.add_value(2, size);
-    currentDocument.add_value(3, wordCount);
+    currentDocument.add_value(1, wordCount);
     currentDocument.set_data(url);
     indexer.set_document(currentDocument);
 
@@ -149,20 +145,6 @@
        stringstream countWordStringStream;
        countWordStringStream << countWords(htmlParser.dump);
        token.wordCount = countWordStringStream.str();
-
-       /* snippet */
-       std::string snippet = std::string(htmlParser.dump, 0, 300);
-       std::string::size_type last = snippet.find_last_of('.');
-       if (last == snippet.npos)
-         last = snippet.find_last_of(' ');
-       if (last != snippet.npos)
-         snippet = snippet.substr(0, last);
-       token.snippet = snippet;
-
-       /* size */
-       stringstream sizeStringStream;
-       sizeStringStream << token.content.size() / 1024;
-       token.size = sizeStringStream.str();
 
        /* Remove accent */
        token.title = removeAccents(token.accentedTitle);
diff --git a/zimwriterfs/xapianIndexer.h b/zimwriterfs/xapianIndexer.h
index 1d854da..16dc094 100644
--- a/zimwriterfs/xapianIndexer.h
+++ b/zimwriterfs/xapianIndexer.h
@@ -61,8 +61,6 @@
                    const string &unaccentedTitle,
                    const string &keywords,
                    const string &content,
-                   const string &snippet,
-                   const string &size,
                    const string &wordCount);
         void flush();
         void indexingPostlude();

-- 
To view, visit https://gerrit.wikimedia.org/r/343897
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr>
Gerrit-Reviewer: Kelson <kel...@kiwix.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to