Petr Onderka has uploaded a new change for review. https://gerrit.wikimedia.org/r/83783
Change subject: Fixed memory issues ...................................................................... Fixed memory issues - clearing index cache periodically, so that it doesn't grow too much - fixed memory leaks - less copying Change-Id: I1483a2f4855a7faa751dd8ed7ee050b17a0c7204 --- M DumpObjects/DumpObject.h M Indexes/Index.h M Indexes/Index.tpp M Indexes/IndexInnerNode.h M Indexes/IndexInnerNode.tpp M Indexes/IndexLeafNode.h M Indexes/IndexLeafNode.tpp M Indexes/IndexNode.h M Indexes/Iterators/IndexNodeIterator.h M XmlUtils.cpp 10 files changed, 47 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/dumps/incremental refs/changes/83/83783/1 diff --git a/DumpObjects/DumpObject.h b/DumpObjects/DumpObject.h index ef21805..3efc910 100644 --- a/DumpObjects/DumpObject.h +++ b/DumpObjects/DumpObject.h @@ -16,18 +16,20 @@ virtual void WriteInternal() = 0; template<typename T> - void WriteValue(const T value); + void WriteValue(const T &value); public: template<typename T> - static void WriteValue(std::ostream &stream, const T value); + static void WriteValue(std::ostream &stream, const T &value); template<typename T> static void ReadValue(std::istream &stream, T &value); template<typename T> - static std::uint32_t ValueSize(const T value); + static std::uint32_t ValueSize(const T &value); virtual std::uint32_t NewLength() = 0; + + virtual ~DumpObjectBase() {} }; class DumpObject : public DumpObjectBase @@ -47,13 +49,13 @@ }; template<typename T> -void DumpObjectBase::WriteValue(const T value) +void DumpObjectBase::WriteValue(const T &value) { WriteValue(*stream, value); } template<typename T> -void DumpObjectBase::WriteValue(std::ostream &stream, const T value) +void DumpObjectBase::WriteValue(std::ostream &stream, const T &value) { DumpTraits<T>::Write(stream, value); } @@ -65,7 +67,7 @@ } template<typename T> -uint32_t DumpObjectBase::ValueSize(const T value) +uint32_t DumpObjectBase::ValueSize(const T &value) { return DumpTraits<T>::DumpSize(value); } \ No newline at end of file diff --git a/Indexes/Index.h b/Indexes/Index.h index 24e822c..ac77ddb 100644 --- a/Indexes/Index.h +++ b/Indexes/Index.h @@ -17,6 +17,8 @@ std::weak_ptr<WritableDump> dump; std::weak_ptr<Offset> fileHeaderOffset; + int recentChanges; + void AfterAdd(); public: Index(std::weak_ptr<WritableDump> dump, std::weak_ptr<Offset> fileHeaderOffset, bool delaySave = false); diff --git a/Indexes/Index.tpp b/Indexes/Index.tpp index 294d755..7ba38d5 100644 --- a/Indexes/Index.tpp +++ b/Indexes/Index.tpp @@ -7,7 +7,7 @@ template<typename TKey, typename TValue> Index<TKey, TValue>::Index(std::weak_ptr<WritableDump> dump, std::weak_ptr<Offset> fileHeaderOffset, bool delaySave) - : dump(dump), fileHeaderOffset(fileHeaderOffset) + : dump(dump), fileHeaderOffset(fileHeaderOffset), recentChanges(0) { auto offset = fileHeaderOffset.lock(); @@ -63,6 +63,14 @@ rootNodeUnsaved = false; } + + recentChanges++; + + if (recentChanges >= 100000) + { + rootNode->ClearCached(); + recentChanges = 0; + } } template<typename TKey, typename TValue> diff --git a/Indexes/IndexInnerNode.h b/Indexes/IndexInnerNode.h index 3b039bc..5ccc1e1 100644 --- a/Indexes/IndexInnerNode.h +++ b/Indexes/IndexInnerNode.h @@ -41,6 +41,8 @@ virtual std::uint32_t RealLength() override; virtual SplitResult Split() override; + virtual void ClearCached() override; + virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override; virtual std::unique_ptr<IndexNodeIterator<TKey, TValue>> end() override; }; diff --git a/Indexes/IndexInnerNode.tpp b/Indexes/IndexInnerNode.tpp index b6607c1..0890b2a 100644 --- a/Indexes/IndexInnerNode.tpp +++ b/Indexes/IndexInnerNode.tpp @@ -152,6 +152,8 @@ template<typename TKey, typename TValue> void IndexInnerNode<TKey, TValue>::Write() { + // TODO: don't do anything when there are no changes + IndexNode<TKey, TValue>::Write(); for (auto &cachedChild : cachedChildren) @@ -217,6 +219,17 @@ } template<typename TKey, typename TValue> +void IndexInnerNode<TKey, TValue>::ClearCached() +{ + Write(); + + for (unsigned i = 0; i < cachedChildren.size(); i++) + { + cachedChildren.at(i) = nullptr; + } +} + +template<typename TKey, typename TValue> std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexInnerNode<TKey, TValue>::begin() { return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new IndexInnerIterator<TKey, TValue>(this, true)); diff --git a/Indexes/IndexLeafNode.h b/Indexes/IndexLeafNode.h index 1598488..ad069ac 100644 --- a/Indexes/IndexLeafNode.h +++ b/Indexes/IndexLeafNode.h @@ -32,6 +32,8 @@ virtual std::uint32_t RealLength() override; virtual SplitResult Split() override; + virtual void ClearCached() override; + virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() override; virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() override; }; diff --git a/Indexes/IndexLeafNode.tpp b/Indexes/IndexLeafNode.tpp index 6f9cb22..2f373ed 100644 --- a/Indexes/IndexLeafNode.tpp +++ b/Indexes/IndexLeafNode.tpp @@ -115,6 +115,10 @@ } template<typename TKey, typename TValue> +void IndexLeafNode<TKey, TValue>::ClearCached() +{} + +template<typename TKey, typename TValue> std::unique_ptr<IndexNodeIterator<TKey, TValue>> IndexLeafNode<TKey, TValue>::begin() { return std::unique_ptr<IndexNodeIterator<TKey, TValue>>(new IndexLeafIterator<TKey, TValue>(indexMap.begin())); diff --git a/Indexes/IndexNode.h b/Indexes/IndexNode.h index 366c395..ce2bec5 100644 --- a/Indexes/IndexNode.h +++ b/Indexes/IndexNode.h @@ -54,6 +54,8 @@ virtual std::uint32_t RealLength() = 0; virtual SplitResult Split() = 0; + virtual void ClearCached() = 0; + virtual unique_ptr<IndexNodeIterator<TKey, TValue>> begin() = 0; virtual unique_ptr<IndexNodeIterator<TKey, TValue>> end() = 0; }; diff --git a/Indexes/Iterators/IndexNodeIterator.h b/Indexes/Iterators/IndexNodeIterator.h index 20dd85e..6249e02 100644 --- a/Indexes/Iterators/IndexNodeIterator.h +++ b/Indexes/Iterators/IndexNodeIterator.h @@ -12,4 +12,6 @@ virtual bool Equals(const IndexNodeIterator *other) const = 0; virtual std::unique_ptr<IndexNodeIterator> Clone() const = 0; + + virtual ~IndexNodeIterator() {}; }; \ No newline at end of file diff --git a/XmlUtils.cpp b/XmlUtils.cpp index 8d613af..a306836 100644 --- a/XmlUtils.cpp +++ b/XmlUtils.cpp @@ -66,14 +66,14 @@ const size_t BUFFER_SIZE = 256; - char* buffer = new char[BUFFER_SIZE]; + std::string buffer(BUFFER_SIZE, '\0'); int read; do { - read = elem.ReadData(buffer, BUFFER_SIZE - 1); - stream.write(buffer, read); + read = elem.ReadData(&buffer.at(0), BUFFER_SIZE - 1); + stream.write(buffer.data(), read); } while (read == BUFFER_SIZE); return unescape(stream.str()); -- To view, visit https://gerrit.wikimedia.org/r/83783 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1483a2f4855a7faa751dd8ed7ee050b17a0c7204 Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps/incremental Gerrit-Branch: gsoc Gerrit-Owner: Petr Onderka <gsv...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits