Petr Onderka has submitted this change and it was merged. Change subject: starting with dump format: file header ......................................................................
starting with dump format: file header Change-Id: I29350dbe9de280efa1248a6c87d174fa6494d32e --- A Dump.cpp A Dump.h A DumpObject.cpp A DumpObject.h A FileHeader.cpp A FileHeader.h M Incremental dumps.vcxproj A Offset.cpp A Offset.h M main.cpp 10 files changed, 246 insertions(+), 2 deletions(-) Approvals: Petr Onderka: Verified; Looks good to me, approved diff --git a/Dump.cpp b/Dump.cpp new file mode 100644 index 0000000..5a0d77f --- /dev/null +++ b/Dump.cpp @@ -0,0 +1,49 @@ +#include <cstdint> +#include <memory> +#include <string> +#include <fstream> +#include "Dump.h" + +using std::unique_ptr; +using std::move; +using std::string; +using std::fstream; +using std::ios; + +ReadableDump::ReadableDump(unique_ptr<iostream> stream) + : stream(move(stream)) +{} + +ReadableDump::ReadableDump(string fileName) + : stream(unique_ptr<fstream>(new fstream(fileName, ios::in | ios::binary))) +{} + +unique_ptr<iostream> WritableDump::openStream(string fileName) +{ + fstream *stream = new fstream(fileName, ios::in | ios::out | ios::binary); + + if (!stream->is_open()) + { + // this feels dangerous, isn't there a better way? + stream = new fstream(fileName, ios::in | ios::out | ios::binary | ios::trunc); + } + + stream->exceptions(ios::failbit | ios::badbit); + + return unique_ptr<iostream>(stream); +} + +WritableDump::WritableDump(string fileName) + : ReadableDump(openStream(fileName)) +{ + if (stream->peek() == EOF) + { + stream->clear(); + fileHeader = FileHeader(); + fileHeader.Write(stream, 0); + } + else + { + fileHeader = FileHeader::Read(stream); + } +} \ No newline at end of file diff --git a/Dump.h b/Dump.h new file mode 100644 index 0000000..e995d1b --- /dev/null +++ b/Dump.h @@ -0,0 +1,35 @@ +#pragma once + +#include <cstdint> +#include <memory> +#include <string> +#include <iostream> +#include "FileHeader.h" + +using std::int64_t; +using std::unique_ptr; +using std::string; +using std::iostream; + +class DumpException : std::exception +{ +}; + +class ReadableDump +{ +protected: + unique_ptr<iostream> stream; + ReadableDump(unique_ptr<iostream> stream); +public: + ReadableDump(string fileName); +}; + +class WritableDump : public ReadableDump +{ +private: + FileHeader fileHeader; + + static unique_ptr<iostream> openStream(string fileName); +public: + WritableDump(string fileName); +}; \ No newline at end of file diff --git a/DumpObject.cpp b/DumpObject.cpp new file mode 100644 index 0000000..ce6715f --- /dev/null +++ b/DumpObject.cpp @@ -0,0 +1,7 @@ +#include "DumpObject.h" + +void DumpObject::Write(unique_ptr<iostream> const &stream, int64_t offset) +{ + stream->seekp(offset); + WriteInternal(stream); +} \ No newline at end of file diff --git a/DumpObject.h b/DumpObject.h new file mode 100644 index 0000000..f7b5c2f --- /dev/null +++ b/DumpObject.h @@ -0,0 +1,17 @@ +#pragma once + +#include <cstdint> +#include <memory> +#include <iostream> + +using std::int64_t; +using std::unique_ptr; +using std::iostream; + +class DumpObject +{ +protected: + virtual void WriteInternal(unique_ptr<iostream> const &stream) = 0; +public: + void Write(unique_ptr<iostream> const &stream, int64_t offset); +}; \ No newline at end of file diff --git a/FileHeader.cpp b/FileHeader.cpp new file mode 100644 index 0000000..ddee2dc --- /dev/null +++ b/FileHeader.cpp @@ -0,0 +1,37 @@ +#include "FileHeader.h" +#include "Dump.h" + +FileHeader::FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset freeSpaceIndexRoot) + : FileEnd(fileEnd), PageIdIndexRoot(pageIdIndexRoot), FreeSpaceIndexRoot(freeSpaceIndexRoot) +{ +} + +void FileHeader::WriteInternal(unique_ptr<iostream> const &stream) +{ + stream->write("WMID", 4); + stream->write(&FileFormatVersion, 1); + stream->write(&FileDataVersion, 1); + + FileEnd.Write(stream); + PageIdIndexRoot.Write(stream); + FreeSpaceIndexRoot.Write(stream); +} + +FileHeader FileHeader::Read(unique_ptr<iostream> const &stream) +{ + char bytes[6]; + stream->read(bytes, 6); + if (stream->fail() || strncmp(bytes, "WMID", 4) != 0 || bytes[4] != FileFormatVersion || bytes[5] != FileDataVersion) + throw new DumpException(); + + Offset fileEnd = Offset::Read(stream); + Offset pageIdIndexRoot = Offset::Read(stream); + Offset freeSpaceIndexRoot = Offset::Read(stream); + + return FileHeader(fileEnd, pageIdIndexRoot, freeSpaceIndexRoot); +} + +FileHeader::FileHeader() + : FileEnd(0), PageIdIndexRoot(0), FreeSpaceIndexRoot(0) +{ +} \ No newline at end of file diff --git a/FileHeader.h b/FileHeader.h new file mode 100644 index 0000000..20e1bb1 --- /dev/null +++ b/FileHeader.h @@ -0,0 +1,26 @@ +#pragma once + +#include <iostream> +#include "DumpObject.h" +#include "Offset.h" + +using std::istream; + +class FileHeader : public DumpObject +{ +private: + FileHeader(Offset fileEnd, Offset pageIdIndexRoot, Offset freeSpaceIndexRoot); +protected: + virtual void WriteInternal(unique_ptr<iostream> const &stream); +public: + static const char FileFormatVersion = 1; + static const char FileDataVersion = 1; + + static FileHeader Read(unique_ptr<iostream> const &stream); + + Offset FileEnd; + Offset PageIdIndexRoot; + Offset FreeSpaceIndexRoot; + + FileHeader(); +}; \ No newline at end of file diff --git a/Incremental dumps.vcxproj b/Incremental dumps.vcxproj index c22e91a..63131ae 100644 --- a/Incremental dumps.vcxproj +++ b/Incremental dumps.vcxproj @@ -78,7 +78,11 @@ </Link> </ItemDefinitionGroup> <ItemGroup> + <ClCompile Include="Dump.cpp" /> + <ClCompile Include="DumpObject.cpp" /> + <ClCompile Include="FileHeader.cpp" /> <ClCompile Include="main.cpp" /> + <ClCompile Include="Offset.cpp" /> <ClCompile Include="TestDumpWriter.cpp" /> <ClCompile Include="XmlPageProcessor.cpp" /> <ClCompile Include="XmlRevisionProcessor.cpp" /> @@ -90,7 +94,11 @@ <ClCompile Include="XML\xmloutput.cpp" /> </ItemGroup> <ItemGroup> + <ClInclude Include="Dump.h" /> + <ClInclude Include="DumpObject.h" /> <ClInclude Include="DumpWriter.h" /> + <ClInclude Include="FileHeader.h" /> + <ClInclude Include="Offset.h" /> <ClInclude Include="Page.h" /> <ClInclude Include="Revision.h" /> <ClInclude Include="TestDumpWriter.h" /> diff --git a/Offset.cpp b/Offset.cpp new file mode 100644 index 0000000..d6097e0 --- /dev/null +++ b/Offset.cpp @@ -0,0 +1,43 @@ +#include "Offset.h" +#include "Dump.h" + +Offset::Offset(int64_t value) + : value(value) +{ + if (value < 0 || value > 0xFFFFFFFFFFFF) // 6 bytes + throw DumpException(); +} + +void Offset::Write(unique_ptr<iostream> const &stream) const +{ + char bytes[6]; + + bytes[0] = value & 0xFF; + bytes[1] = (value >> 8) & 0xFF; + bytes[2] = (value >> 16) & 0xFF; + bytes[3] = (value >> 24) & 0xFF; + bytes[4] = (value >> 32) & 0xFF; + bytes[5] = (value >> 40) & 0xFF; + + stream->write(bytes, 6); +} + +Offset Offset::Read(unique_ptr<iostream> const &stream) +{ + char bytes[6]; + + stream->read(bytes, 6); + + if (stream->fail()) + throw new DumpException(); + + int64_t offset = 0; + offset |= (int64_t)bytes[0]; + offset |= (int64_t)bytes[1] << 8; + offset |= (int64_t)bytes[2] << 16; + offset |= (int64_t)bytes[3] << 24; + offset |= (int64_t)bytes[4] << 32; + offset |= (int64_t)bytes[5] << 40; + + return Offset(offset); +} \ No newline at end of file diff --git a/Offset.h b/Offset.h new file mode 100644 index 0000000..f0f451a --- /dev/null +++ b/Offset.h @@ -0,0 +1,19 @@ +#pragma once + +#include <cstdint> +#include <memory> +#include <iostream> + +using std::int64_t; +using std::unique_ptr; +using std::iostream; + +class Offset +{ +public: + int64_t value; + + Offset(int64_t value); + void Write(unique_ptr<iostream> const &stream) const; + static Offset Read(unique_ptr<iostream> const &stream); +}; \ No newline at end of file diff --git a/main.cpp b/main.cpp index 0bb615a..229b7b2 100644 --- a/main.cpp +++ b/main.cpp @@ -4,6 +4,7 @@ #include "XML/xmlfile.h" #include "TestDumpWriter.h" #include "XmlPageProcessor.h" +#include "Dump.h" using std::string; using std::cin; @@ -31,7 +32,7 @@ int main(int argc, const char* argv[]) { //StandardInputStream stream; - XML::FileInputStream stream = XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml"); + /*XML::FileInputStream stream = XML::FileInputStream("C:\\Users\\Svick\\Downloads\\tenwiki-20130622-pages-meta-history.xml"); XML::Input input(stream); @@ -42,5 +43,7 @@ TestDumpWriter writer; - input.Process(handlers, &writer); + input.Process(handlers, &writer);*/ + + WritableDump dump("test.id"); } \ No newline at end of file -- To view, visit https://gerrit.wikimedia.org/r/71995 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I29350dbe9de280efa1248a6c87d174fa6494d32e Gerrit-PatchSet: 1 Gerrit-Project: operations/dumps/incremental Gerrit-Branch: gsoc Gerrit-Owner: Petr Onderka <gsv...@gmail.com> Gerrit-Reviewer: Petr Onderka <gsv...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits