[MediaWiki-commits] [Gerrit] openzim[master]: Do not store the snippet nor the size of the content in the ...

2017-03-25 Thread Kelson (Code Review)
Kelson has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/343897 )

Change subject: Do not store the snippet nor the size of the content in the 
database.
..


Do not store the snippet nor the size of the content in the database.

Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
---
M zimwriterfs/indexer.cpp
M zimwriterfs/indexer.h
M zimwriterfs/xapianIndexer.cpp
M zimwriterfs/xapianIndexer.h
4 files changed, 2 insertions(+), 28 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimwriterfs/indexer.cpp b/zimwriterfs/indexer.cpp
index 6c26fc9..b83abd4 100644
--- a/zimwriterfs/indexer.cpp
+++ b/zimwriterfs/indexer.cpp
@@ -84,8 +84,6 @@
  token.title,
  token.keywords,
  token.content,
- token.snippet,
- token.size,
  token.wordCount
  );
 
diff --git a/zimwriterfs/indexer.h b/zimwriterfs/indexer.h
index 02d989b..3291e36 100644
--- a/zimwriterfs/indexer.h
+++ b/zimwriterfs/indexer.h
@@ -46,8 +46,6 @@
 string title;
 string keywords;
 string content;
-string snippet;
-string size;
 string wordCount;
 };
 
@@ -70,8 +68,6 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) = 0;
 virtual void flush() = 0;
 virtual void indexingPostlude() = 0;
diff --git a/zimwriterfs/xapianIndexer.cpp b/zimwriterfs/xapianIndexer.cpp
index 65129b7..db27f9d 100644
--- a/zimwriterfs/xapianIndexer.cpp
+++ b/zimwriterfs/xapianIndexer.cpp
@@ -52,7 +52,7 @@
 void XapianIndexer::indexingPrelude(const string indexPath_) {
 indexPath = indexPath_;
 this->writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", 
Xapian::DB_CREATE_OR_OVERWRITE);
-this->writableDatabase.set_metadata("valuesmap", 
"title:0;snippet:1;size:2;wordcount:3");
+this->writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1");
 this->writableDatabase.begin_transaction(true);
 
 /* Insert the stopwords */
@@ -72,17 +72,13 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) {
 
 /* Put the data in the document */
 Xapian::Document currentDocument;
 currentDocument.clear_values();
 currentDocument.add_value(0, title);
-currentDocument.add_value(1, snippet);
-currentDocument.add_value(2, size);
-currentDocument.add_value(3, wordCount);
+currentDocument.add_value(1, wordCount);
 currentDocument.set_data(url);
 indexer.set_document(currentDocument);
 
@@ -149,20 +145,6 @@
stringstream countWordStringStream;
countWordStringStream << countWords(htmlParser.dump);
token.wordCount = countWordStringStream.str();
-
-   /* snippet */
-   std::string snippet = std::string(htmlParser.dump, 0, 300);
-   std::string::size_type last = snippet.find_last_of('.');
-   if (last == snippet.npos)
- last = snippet.find_last_of(' ');
-   if (last != snippet.npos)
- snippet = snippet.substr(0, last);
-   token.snippet = snippet;
-
-   /* size */
-   stringstream sizeStringStream;
-   sizeStringStream << token.content.size() / 1024;
-   token.size = sizeStringStream.str();
 
/* Remove accent */
token.title = removeAccents(token.accentedTitle);
diff --git a/zimwriterfs/xapianIndexer.h b/zimwriterfs/xapianIndexer.h
index 1d854da..16dc094 100644
--- a/zimwriterfs/xapianIndexer.h
+++ b/zimwriterfs/xapianIndexer.h
@@ -61,8 +61,6 @@
const string ,
const string ,
const string ,
-   const string ,
-   const string ,
const string );
 void flush();
 void indexingPostlude();

-- 
To view, visit https://gerrit.wikimedia.org/r/343897
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr 
Gerrit-Reviewer: Kelson 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] openzim[master]: Do not store the snippet nor the size of the content in the ...

2017-03-23 Thread Kelson (Code Review)
Kelson has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/343897 )

Change subject: Do not store the snippet nor the size of the content in the 
database.
..


Do not store the snippet nor the size of the content in the database.

Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
---
M zimwriterfs/indexer.cpp
M zimwriterfs/indexer.h
M zimwriterfs/xapianIndexer.cpp
M zimwriterfs/xapianIndexer.h
4 files changed, 2 insertions(+), 28 deletions(-)

Approvals:
  Kelson: Verified; Looks good to me, approved



diff --git a/zimwriterfs/indexer.cpp b/zimwriterfs/indexer.cpp
index 6c26fc9..b83abd4 100644
--- a/zimwriterfs/indexer.cpp
+++ b/zimwriterfs/indexer.cpp
@@ -84,8 +84,6 @@
  token.title,
  token.keywords,
  token.content,
- token.snippet,
- token.size,
  token.wordCount
  );
 
diff --git a/zimwriterfs/indexer.h b/zimwriterfs/indexer.h
index 02d989b..3291e36 100644
--- a/zimwriterfs/indexer.h
+++ b/zimwriterfs/indexer.h
@@ -46,8 +46,6 @@
 string title;
 string keywords;
 string content;
-string snippet;
-string size;
 string wordCount;
 };
 
@@ -70,8 +68,6 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) = 0;
 virtual void flush() = 0;
 virtual void indexingPostlude() = 0;
diff --git a/zimwriterfs/xapianIndexer.cpp b/zimwriterfs/xapianIndexer.cpp
index 65129b7..db27f9d 100644
--- a/zimwriterfs/xapianIndexer.cpp
+++ b/zimwriterfs/xapianIndexer.cpp
@@ -52,7 +52,7 @@
 void XapianIndexer::indexingPrelude(const string indexPath_) {
 indexPath = indexPath_;
 this->writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", 
Xapian::DB_CREATE_OR_OVERWRITE);
-this->writableDatabase.set_metadata("valuesmap", 
"title:0;snippet:1;size:2;wordcount:3");
+this->writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1");
 this->writableDatabase.begin_transaction(true);
 
 /* Insert the stopwords */
@@ -72,17 +72,13 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) {
 
 /* Put the data in the document */
 Xapian::Document currentDocument;
 currentDocument.clear_values();
 currentDocument.add_value(0, title);
-currentDocument.add_value(1, snippet);
-currentDocument.add_value(2, size);
-currentDocument.add_value(3, wordCount);
+currentDocument.add_value(1, wordCount);
 currentDocument.set_data(url);
 indexer.set_document(currentDocument);
 
@@ -149,20 +145,6 @@
stringstream countWordStringStream;
countWordStringStream << countWords(htmlParser.dump);
token.wordCount = countWordStringStream.str();
-
-   /* snippet */
-   std::string snippet = std::string(htmlParser.dump, 0, 300);
-   std::string::size_type last = snippet.find_last_of('.');
-   if (last == snippet.npos)
- last = snippet.find_last_of(' ');
-   if (last != snippet.npos)
- snippet = snippet.substr(0, last);
-   token.snippet = snippet;
-
-   /* size */
-   stringstream sizeStringStream;
-   sizeStringStream << token.content.size() / 1024;
-   token.size = sizeStringStream.str();
 
/* Remove accent */
token.title = removeAccents(token.accentedTitle);
diff --git a/zimwriterfs/xapianIndexer.h b/zimwriterfs/xapianIndexer.h
index 1d854da..16dc094 100644
--- a/zimwriterfs/xapianIndexer.h
+++ b/zimwriterfs/xapianIndexer.h
@@ -61,8 +61,6 @@
const string ,
const string ,
const string ,
-   const string ,
-   const string ,
const string );
 void flush();
 void indexingPostlude();

-- 
To view, visit https://gerrit.wikimedia.org/r/343897
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr 
Gerrit-Reviewer: Kelson 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] openzim[master]: Do not store the snippet nor the size of the content in the ...

2017-03-21 Thread Mgautierfr (Code Review)
Mgautierfr has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/343897 )

Change subject: Do not store the snippet nor the size of the content in the 
database.
..

Do not store the snippet nor the size of the content in the database.

Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
---
M zimwriterfs/indexer.cpp
M zimwriterfs/indexer.h
M zimwriterfs/xapianIndexer.cpp
M zimwriterfs/xapianIndexer.h
4 files changed, 2 insertions(+), 28 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/openzim refs/changes/97/343897/1

diff --git a/zimwriterfs/indexer.cpp b/zimwriterfs/indexer.cpp
index 6c26fc9..b83abd4 100644
--- a/zimwriterfs/indexer.cpp
+++ b/zimwriterfs/indexer.cpp
@@ -84,8 +84,6 @@
  token.title,
  token.keywords,
  token.content,
- token.snippet,
- token.size,
  token.wordCount
  );
 
diff --git a/zimwriterfs/indexer.h b/zimwriterfs/indexer.h
index 02d989b..3291e36 100644
--- a/zimwriterfs/indexer.h
+++ b/zimwriterfs/indexer.h
@@ -46,8 +46,6 @@
 string title;
 string keywords;
 string content;
-string snippet;
-string size;
 string wordCount;
 };
 
@@ -70,8 +68,6 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) = 0;
 virtual void flush() = 0;
 virtual void indexingPostlude() = 0;
diff --git a/zimwriterfs/xapianIndexer.cpp b/zimwriterfs/xapianIndexer.cpp
index 65129b7..db27f9d 100644
--- a/zimwriterfs/xapianIndexer.cpp
+++ b/zimwriterfs/xapianIndexer.cpp
@@ -52,7 +52,7 @@
 void XapianIndexer::indexingPrelude(const string indexPath_) {
 indexPath = indexPath_;
 this->writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", 
Xapian::DB_CREATE_OR_OVERWRITE);
-this->writableDatabase.set_metadata("valuesmap", 
"title:0;snippet:1;size:2;wordcount:3");
+this->writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1");
 this->writableDatabase.begin_transaction(true);
 
 /* Insert the stopwords */
@@ -72,17 +72,13 @@
   const string ,
   const string ,
   const string ,
-  const string ,
-  const string ,
   const string ) {
 
 /* Put the data in the document */
 Xapian::Document currentDocument;
 currentDocument.clear_values();
 currentDocument.add_value(0, title);
-currentDocument.add_value(1, snippet);
-currentDocument.add_value(2, size);
-currentDocument.add_value(3, wordCount);
+currentDocument.add_value(1, wordCount);
 currentDocument.set_data(url);
 indexer.set_document(currentDocument);
 
@@ -149,20 +145,6 @@
stringstream countWordStringStream;
countWordStringStream << countWords(htmlParser.dump);
token.wordCount = countWordStringStream.str();
-
-   /* snippet */
-   std::string snippet = std::string(htmlParser.dump, 0, 300);
-   std::string::size_type last = snippet.find_last_of('.');
-   if (last == snippet.npos)
- last = snippet.find_last_of(' ');
-   if (last != snippet.npos)
- snippet = snippet.substr(0, last);
-   token.snippet = snippet;
-
-   /* size */
-   stringstream sizeStringStream;
-   sizeStringStream << token.content.size() / 1024;
-   token.size = sizeStringStream.str();
 
/* Remove accent */
token.title = removeAccents(token.accentedTitle);
diff --git a/zimwriterfs/xapianIndexer.h b/zimwriterfs/xapianIndexer.h
index 1d854da..16dc094 100644
--- a/zimwriterfs/xapianIndexer.h
+++ b/zimwriterfs/xapianIndexer.h
@@ -61,8 +61,6 @@
const string ,
const string ,
const string ,
-   const string ,
-   const string ,
const string );
 void flush();
 void indexingPostlude();

-- 
To view, visit https://gerrit.wikimedia.org/r/343897
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729
Gerrit-PatchSet: 1
Gerrit-Project: openzim
Gerrit-Branch: master
Gerrit-Owner: Mgautierfr 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits