Petr Onderka has submitted this change and it was merged.

Change subject: Fixed memory issues
......................................................................


Fixed memory issues

- clearing index cache periodically, so that it doesn't grow too much
- fixed memory leaks
- less copying

Change-Id: I1483a2f4855a7faa751dd8ed7ee050b17a0c7204
---
M DumpObjects/DumpObject.h
M Indexes/Index.h
M Indexes/Index.tpp
M Indexes/IndexInnerNode.h
M Indexes/IndexInnerNode.tpp
M Indexes/IndexLeafNode.h
M Indexes/IndexLeafNode.tpp
M Indexes/IndexNode.h
M Indexes/Iterators/IndexNodeIterator.h
M XmlUtils.cpp
10 files changed, 47 insertions(+), 10 deletions(-)

Approvals:
  Petr Onderka: Verified; Looks good to me, approved



diff --git a/DumpObjects/DumpObject.h b/DumpObjects/DumpObject.h
index ef21805..3efc910 100644
--- a/DumpObjects/DumpObject.h
+++ b/DumpObjects/DumpObject.h
@@ -16,18 +16,20 @@
     virtual void WriteInternal() = 0;
 
     template<typename T>
-    void WriteValue(const T value);
+    void WriteValue(const T &value);
 public:
     template<typename T>
-    static void WriteValue(std::ostream &stream, const T value);
+    static void WriteValue(std::ostream &stream, const T &value);
 
     template<typename T>
     static void ReadValue(std::istream &stream, T &value);
 
     template<typename T>
-    static std::uint32_t ValueSize(const T value);
+    static std::uint32_t ValueSize(const T &value);
 
     virtual std::uint32_t NewLength() = 0;
+
+    virtual ~DumpObjectBase() {}
 };
 
 class DumpObject : public DumpObjectBase
@@ -47,13 +49,13 @@
 };
 
 template<typename T>
-void DumpObjectBase::WriteValue(const T value)
+void DumpObjectBase::WriteValue(const T &value)
 {
     WriteValue(*stream, value);
 }
 
 template<typename T>
-void DumpObjectBase::WriteValue(std::ostream &stream, const T value)
+void DumpObjectBase::WriteValue(std::ostream &stream, const T &value)
 {
     DumpTraits<T>::Write(stream, value);
 }
@@ -65,7 +67,7 @@
 }
 
 template<typename T>
-uint32_t DumpObjectBase::ValueSize(const T value)
+uint32_t DumpObjectBase::ValueSize(const T &value)
 {
     return DumpTraits<T>::DumpSize(value);
 }
\ No newline at end of file
diff --git a/Indexes/Index.h b/Indexes/Index.h
index 24e822c..ac77ddb 100644
--- a/Indexes/Index.h
+++ b/Indexes/Index.h
@@ -17,6 +17,8 @@
     std::weak_ptr<WritableDump> dump;
     std::weak_ptr<Offset> fileHeaderOffset;
 
+    int recentChanges;
+
     void AfterAdd();
 public:
     Index(std::weak_ptr<WritableDump> dump, std::weak_ptr<Offset> 
fileHeaderOffset, bool delaySave = false);
diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp
index 294d755..7ba38d5 100644
--- a/Indexes/Index.tpp
+++ b/Indexes/Index.tpp
@@ -7,7 +7,7 @@
 
 template<typename TKey, typename TValue>
 Index<TKey, TValue>::Index(std::weak_ptr<WritableDump> dump, 
std::weak_ptr<Offset> fileHeaderOffset, bool delaySave)
-    : dump(dump), fileHeaderOffset(fileHeaderOffset)
+    : dump(dump), fileHeaderOffset(fileHeaderOffset), recentChanges(0)
 {
     auto offset = fileHeaderOffset.lock();
 
@@ -63,6 +63,14 @@
 
         rootNodeUnsaved = false;
     }
+
+    recentChanges++;
+
+    if (recentChanges >= 100000)
+    {
+        rootNode->ClearCached();
+        recentChanges = 0;
+    }
 }
 
 template<typename TKey, typename TValue>
diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h
index 3b039bc..5ccc1e1 100644
--- a/Indexes/IndexInnerNode.h
+++ b/Indexes/IndexInnerNode.h
@@ -41,6 +41,8 @@
     virtual std::uint32_t RealLength() override;
     virtual SplitResult Split() override;
 
+    virtual void ClearCached() override;
+
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
     virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
 };
diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp
index b6607c1..0890b2a 100644
--- a/Indexes/IndexInnerNode.tpp
+++ b/Indexes/IndexInnerNode.tpp
@@ -152,6 +152,8 @@
 template<typename TKey, typename TValue>
 void IndexInnerNode<TKey, TValue>::Write()
 {
+    // TODO: don't do anything when there are no changes
+
     IndexNode<TKey, TValue>::Write();
 
     for (auto &cachedChild : cachedChildren)
@@ -217,6 +219,17 @@
 }
 
 template<typename TKey, typename TValue>
+void IndexInnerNode<TKey, TValue>::ClearCached()
+{
+    Write();
+
+    for (unsigned i = 0; i < cachedChildren.size(); i++)
+    {
+        cachedChildren.at(i) = nullptr;
+    }
+}
+
+template<typename TKey, typename TValue>
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, 
TValue>::begin()
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexInnerIterator<TKey, TValue>(this, true));
diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h
index 1598488..ad069ac 100644
--- a/Indexes/IndexLeafNode.h
+++ b/Indexes/IndexLeafNode.h
@@ -32,6 +32,8 @@
     virtual std::uint32_t RealLength() override;
     virtual SplitResult Split() override;
 
+    virtual void ClearCached() override;
+
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override;
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() override;
 };
diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp
index 6f9cb22..2f373ed 100644
--- a/Indexes/IndexLeafNode.tpp
+++ b/Indexes/IndexLeafNode.tpp
@@ -115,6 +115,10 @@
 }
 
 template<typename TKey, typename TValue>
+void IndexLeafNode<TKey, TValue>::ClearCached()
+{}
+
+template<typename TKey, typename TValue>
 std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, 
TValue>::begin()
 {
     return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new 
IndexLeafIterator<TKey, TValue>(indexMap.begin()));
diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h
index 366c395..ce2bec5 100644
--- a/Indexes/IndexNode.h
+++ b/Indexes/IndexNode.h
@@ -54,6 +54,8 @@
     virtual std::uint32_t RealLength() = 0;
     virtual SplitResult Split() = 0;
 
+    virtual void ClearCached() = 0;
+
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0;
     virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() = 0;
 };
diff --git a/Indexes/Iterators/IndexNodeIterator.h 
b/Indexes/Iterators/IndexNodeIterator.h
index 20dd85e..6249e02 100644
--- a/Indexes/Iterators/IndexNodeIterator.h
+++ b/Indexes/Iterators/IndexNodeIterator.h
@@ -12,4 +12,6 @@
 
     virtual bool Equals(const IndexNodeIterator *other) const = 0;
     virtual std::unique_ptr<IndexNodeIterator> Clone() const = 0;
+
+    virtual ~IndexNodeIterator() {};
 };
\ No newline at end of file
diff --git a/XmlUtils.cpp b/XmlUtils.cpp
index 8d613af..a306836 100644
--- a/XmlUtils.cpp
+++ b/XmlUtils.cpp
@@ -66,14 +66,14 @@
 
     const size_t BUFFER_SIZE = 256;
 
-    char* buffer = new char[BUFFER_SIZE];
+    std::string buffer(BUFFER_SIZE, '\0');
 
     int read;
 
     do
     {
-        read = elem.ReadData(buffer, BUFFER_SIZE - 1);
-        stream.write(buffer, read);
+        read = elem.ReadData(&buffer.at(0), BUFFER_SIZE - 1);
+        stream.write(buffer.data(), read);
     } while (read == BUFFER_SIZE);
     
     return unescape(stream.str());

-- 
To view, visit https://gerrit.wikimedia.org/r/83783
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I1483a2f4855a7faa751dd8ed7ee050b17a0c7204
Gerrit-PatchSet: 1
Gerrit-Project: operations/dumps/incremental
Gerrit-Branch: gsoc
Gerrit-Owner: Petr Onderka <gsv...@gmail.com>
Gerrit-Reviewer: Petr Onderka <gsv...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to