this is bare bones c++ code to parse the output of git cat-file --batch --batch-all-objects
it doesn't _do_ anything with its parsed data. it parrots tree and commit object fields to stdout.
#include <cstdint> #include <iostream> #include <unordered_map> #include <sstream> #include <string> #include <vector> using namespace std; struct RawObject { void read(istream & is) { uint64_t size; is >> hash >> type >> size; is.ignore(1); // newline after size data.resize(size); for (char * ptr = data.data(); ptr < &*data.end();) { is.read(ptr, &*data.end() - ptr); ptr += is.gcount(); } } string hash, type; vector<char> data; }; struct Commit { Commit() {} void parse(RawObject const & obj) { istringstream ss(string(obj.data.begin(), obj.data.end())); header.clear(); fields.clear(); header.emplace_back("commit", obj.hash); static thread_local string temp; while ("reading commit header") { switch (ss.peek()) { default: header.resize(header.size() + 1); ss >> header.back().first; ss.ignore(1); getline(ss, header.back().second); continue; case ' ': ss.ignore(1); header.back().second += '\n'; getline(ss, temp); header.back().second += temp; continue; case '\n': ss.ignore(1); goto end_of_header; } } end_of_header: for (auto & item : header) { fields.emplace(item.first, &item.second); } message = string(istreambuf_iterator<char>(ss), {}); } string const & operator[](string const & field) { return *fields.equal_range(field).first->second; } vector<string> all(string const & field) { vector<string> result; auto range = fields.equal_range(field); for (auto it = range.first; it != range.second; ++ it) { result.emplace_back(*it->second); } return result; } vector<pair<string, string>> header; unordered_multimap<string, string *> fields; string message; }; struct Tree { Tree() {} void parse(RawObject const & obj) { entries.clear(); istringstream ss(string(obj.data.begin(), obj.data.end())); while (ss.tellg() < obj.data.size()) { entries.emplace_back(); entries.back().parse(ss); } } struct Entry { Entry() {} void parse(istream & data) { getline(data, mode, ' '); getline(data, name, '\0'); data.read(_hash, 20); } string mode; string name; string const & hash() { static thread_local string hex(40, 0); for (auto dst = &hex[0], src = _hash; dst != &*hex.end(); dst += 2, ++ src) { sprintf(dst, "%02x", (uint8_t)*src); } return hex; } char _hash[20]; }; vector<Entry> entries; }; /* =========================================================== */ class Reader { public: Reader(istream & is) : is(is) { } void read() { static thread_local RawObject obj; static thread_local Commit commit; static thread_local Tree tree; obj.read(is); cout << obj.type << " " << obj.data.size() << endl; if (obj.type == "commit") { commit.parse(obj); for ( auto item : commit.fields ) { cout << item.first << ": " << *item.second << endl; } cout << commit.message << endl; } else if (obj.type == "tree") { tree.parse(obj); for ( auto & entry : tree.entries ) { cout << entry.name << ": " << entry.hash() << endl; } } } private: istream & is; }; int main() { Reader reader(cin); while (true) { reader.read(); } }