this is bare bones c++ code to parse the output of git cat-file
--batch --batch-all-objects

it doesn't _do_ anything with its parsed data. it parrots tree and
commit object fields to stdout.
#include <cstdint>
#include <iostream>
#include <unordered_map>
#include <sstream>
#include <string>
#include <vector>

using namespace std;

struct RawObject
{
    void read(istream & is)
    {
        uint64_t size;
        is >> hash >> type >> size;
        is.ignore(1); // newline after size
        data.resize(size);
        for (char * ptr = data.data(); ptr < &*data.end();) {
            is.read(ptr, &*data.end() - ptr);
            ptr += is.gcount();
        }
    }
    string hash, type;
    vector<char> data;
};

struct Commit
{
    Commit() {}
    void parse(RawObject const & obj)
    {
        istringstream ss(string(obj.data.begin(), obj.data.end()));

        header.clear();
        fields.clear();
        header.emplace_back("commit", obj.hash);

        static thread_local string temp;
        while ("reading commit header") {
            switch (ss.peek()) {
            default:
                header.resize(header.size() + 1);
                ss >> header.back().first; ss.ignore(1);
                getline(ss, header.back().second);
                continue;
            case ' ':
                ss.ignore(1); header.back().second += '\n';
                getline(ss, temp);
                header.back().second += temp;
                continue;
            case '\n':
                ss.ignore(1);
                goto end_of_header;
            }
        }

end_of_header:
        for (auto & item : header) {
            fields.emplace(item.first, &item.second);
        }
        message = string(istreambuf_iterator<char>(ss), {});
    }

    string const & operator[](string const & field)
    {
        return *fields.equal_range(field).first->second;
    }

    vector<string> all(string const & field)
    {
        vector<string> result;
        auto range = fields.equal_range(field);
        for (auto it = range.first; it != range.second; ++ it) {
            result.emplace_back(*it->second);
        }
        return result;
    }

    vector<pair<string, string>> header;
    unordered_multimap<string, string *> fields;
    string message;
};

struct Tree
{
    Tree() {}
    void parse(RawObject const & obj)
    {
        entries.clear();
        istringstream ss(string(obj.data.begin(), obj.data.end()));

        while (ss.tellg() < obj.data.size()) {
            entries.emplace_back();
            entries.back().parse(ss);
        }
    }
    struct Entry
    {
        Entry() {}
        void parse(istream & data)
        {
            getline(data, mode, ' ');
            getline(data, name, '\0');
            data.read(_hash, 20);
        }

        string mode;
        string name;
        string const & hash()
        {
            static thread_local string hex(40, 0);
            for (auto dst = &hex[0], src = _hash; dst != &*hex.end(); dst += 2, ++ src) {
                sprintf(dst, "%02x", (uint8_t)*src);
            }
            return hex;
        }

        char _hash[20];
    };
    vector<Entry> entries;
};

/* =========================================================== */

class Reader
{
public:
    Reader(istream & is)
    : is(is)
    { }
    void read()
    {
        static thread_local RawObject obj;
        static thread_local Commit commit;
        static thread_local Tree tree;

        obj.read(is);

        cout << obj.type << " " << obj.data.size() << endl;

        if (obj.type == "commit")
        {
            commit.parse(obj);
            for ( auto item : commit.fields ) {
                cout << item.first << ": " << *item.second << endl;
            }   
            cout << commit.message << endl;
        }
        else if (obj.type == "tree")
        {
            tree.parse(obj);
            for ( auto & entry : tree.entries ) {
                cout << entry.name << ": " << entry.hash() << endl;
            }
        }
    }

private:
    istream & is;
};

int main()
{
    Reader reader(cin);
    while (true) {
        reader.read();
    }
}

Reply via email to