Changeset: 35ea2cd7656d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/35ea2cd7656d
Added Files:
        documentation/source/binary-resultset.rst
Modified Files:
        sql/backends/monet5/sql_result.c
Branch: binresultset
Log Message:

Implement and document the revised binary result set format


diffs (136 lines):

diff --git a/documentation/source/binary-resultset.rst 
b/documentation/source/binary-resultset.rst
new file mode 100644
--- /dev/null
+++ b/documentation/source/binary-resultset.rst
@@ -0,0 +1,64 @@
+.. This Source Code Form is subject to the terms of the Mozilla Public
+.. License, v. 2.0.  If a copy of the MPL was not distributed with this
+.. file, You can obtain one at http://mozilla.org/MPL/2.0/.
+..
+.. Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
+
+************************
+Binary Result set format
+************************
+
+Note: this explanation will eventually be folded into a more comprehensive
+description of the MAPI protocol on the MonetDB website.
+In the mean time, it lives in this directory.
+
+
+Overview
+========
+
+When MonetDB executes a query it immediately sends the first `N` rows of the
+result set to the client, where `N` is determined by the ``reply_size`` 
setting.
+If the client needs more it can use the ``Xexport <startrow> <count>`` command
+to request more rows.
+
+Recently we have added the ``Xexportbin <startrow> <count>`` command which
+behaves the same but uses a binary format that may be more efficient to parse.
+
+The server advertises its support for ``Xexportbin`` in the eighth field of its
+connect challenge. For example,
+
+    
bL1sNfkaa:mserver:9:RIPEMD160,SHA512,SHA384,SHA256,SHA224,SHA1,COMPRESSION_SNAPPY,COMPRESSION_LZ4:LIT:SHA512:sql=6:**BINARY=1**:
+
+Currently it sends ``BINARY=1``. In the future it may send a higher number if
+variants are added.
+
+
+Binary result set layout
+========================
+
+In response to ``Xexportbin <startrow> <count>`` the server returns a big blob
+of bytes delimited by a flush. In other words, the end is marked by the final
+8KiB MAPI block having its end-flag enabled, just like a regular response. (We
+can make this explanation more clear when the text is embedded in an overall
+description of the MAPI protocol.)
+
+To interpret the bytes, first look at the first character. If it starts with
+an exclamation mark, the rest of the message is an error message.
+Otherwise, look at the final 8 bytes. These form a 64 bit
+server-endian integer. In the current version of the protocol the number will
+always be either positive or negative and never zero.
+
+If the number is negative, an error has occurred, and the negated number is the
+byte offset of the error message, counting from the start of the response. The
+end of the error message is marked by a nul byte. Note: the error message 
starts
+with an exclamation mark, just as in the textual protocol.
+
+If the number is positive, it is the byte offset of the table of contents of
+the response.  This is a sequence of 16-byte entries, one for each column
+of the result set.  Each entry consists of the starting offset and the length
+in bytes of the data for that column, again expressed as 64 bits server-endian
+integers.
+
+The byte layout of each individual column is identical to what would have been
+produced by ``COPY select_query INTO BINARY 'file1', 'file2', ...``.
+
diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -1913,7 +1913,8 @@ mvc_export_bin_chunk(backend *b, stream 
        struct bindump_record *colinfo;
        stream *countstream = NULL;
        uint64_t byte_count = 0;
-       BUN end = offset + nr;
+       uint64_t toc_pos = 0;
+       BUN end_row = offset + nr;
 
        res_table *res = res_tables_find(b->results, res_id);
        if (res == NULL)
@@ -1935,8 +1936,8 @@ mvc_export_bin_chunk(backend *b, stream 
                }
                colinfo[i].bat = b;
 
-               if (BATcount(b) < end)
-                       end = BATcount(b);
+               if (BATcount(b) < end_row)
+                       end_row = BATcount(b);
 
                int tpe = BATttype(b);
                const char *gdk_name = ATOMname(tpe);
@@ -1953,16 +1954,20 @@ mvc_export_bin_chunk(backend *b, stream 
                colinfo[i].type_rec = rec;
        }
 
-       // Probably have to deal with t->order somehow..
+       // TODO: Probably have to deal with t->order somehow..
+       // Right now we just write the contents of the bats.
+
+       // The byte_counting_stream keeps track of the byte offsets
        countstream = byte_counting_stream(s, &byte_count);
 
-       mnstr_printf(countstream, "&6 %d %d " BUNFMT " " BUNFMT "\n", res_id, 
res->nr_cols, end - offset, offset);
+       // Make sure the message starts with a & and not with a !
+       mnstr_printf(countstream, "&6 %d %d " BUNFMT " " BUNFMT "\n", res_id, 
res->nr_cols, end_row - offset, offset);
 
        for (int i = 0; i < res->nr_cols; i++) {
                align_dump(countstream, &byte_count, 32); // 32 looks nice in 
tcpflow
                struct bindump_record *info = &colinfo[i];
                info->start = byte_count;
-               str msg = dump_binary_column(info->type_rec, info->bat, offset, 
end - offset, false, countstream);
+               str msg = dump_binary_column(info->type_rec, info->bat, offset, 
end_row - offset, false, countstream);
                if (msg != MAL_SUCCEED) {
                        GDKerror("%s", msg);
                        GDKfree(msg);
@@ -1972,7 +1977,10 @@ mvc_export_bin_chunk(backend *b, stream 
                info->length = byte_count - info->start;
        }
 
+       assert(byte_count > 0);
+
        align_dump(countstream, &byte_count, 32);
+       toc_pos = byte_count;
        for (int i = 0; i < res->nr_cols; i++) {
                struct bindump_record *info = &colinfo[i];
                lng start = info->start;
@@ -1981,6 +1989,7 @@ mvc_export_bin_chunk(backend *b, stream 
                mnstr_writeLng(countstream, length);
        }
 
+       mnstr_writeLng(countstream, toc_pos);
        ret = 0;
 
 end:
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to