[
https://issues.apache.org/jira/browse/ORC-8?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15107163#comment-15107163
]
ASF GitHub Bot commented on ORC-8:
----------------------------------
Github user asandryh commented on a diff in the pull request:
https://github.com/apache/orc/pull/15#discussion_r50154368
--- Diff: tools/src/FileMetadata.cc ---
@@ -16,168 +16,162 @@
* limitations under the License.
*/
+#include <getopt.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>
-#include <iomanip>
-#include "wrap/orc-proto-wrapper.hh"
#include "orc/OrcFile.hh"
-using namespace orc::proto;
-
-uint64_t getTotalPaddingSize(const Footer& footer) {
- uint64_t paddedBytes = 0;
- StripeInformation stripe;
- for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
- stripe = footer.stripes(stripeIx-1);
- uint64_t prevStripeOffset = stripe.offset();
- uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
- stripe.footerlength();
- paddedBytes += footer.stripes(stripeIx).offset() -
- (prevStripeOffset + prevStripeLen);
- };
- return paddedBytes;
+void printStripeInformation(std::ostream& out,
+ uint64_t index,
+ uint64_t columns,
+ std::unique_ptr<orc::StripeInformation> stripe,
+ bool verbose) {
+ out << " { \"stripe\": " << index
+ << ", \"rows\": " << stripe->getNumberOfRows() << ",\n";
+ out << " \"offset\": " << stripe->getOffset()
+ << ", \"length\": " << stripe->getLength() << ",\n";
+ out << " \"index\": " << stripe->getIndexLength()
+ << ", \"data\": " << stripe->getDataLength()
+ << ", \"footer\": " << stripe->getFooterLength();
+ if (verbose) {
+ out << ",\n \"encodings\": [\n";
+ for(uint64_t col=0; col < columns; ++col) {
+ if (col != 0) {
+ out << ",\n";
+ }
+ orc::ColumnEncodingKind encoding = stripe->getColumnEncoding(col);
+ out << " { \"column\": " << col
+ << ", \"encoding\": \""
+ << columnEncodingKindToString(encoding) << "\"";
+ if (encoding == orc::ColumnEncodingKind_DICTIONARY ||
+ encoding == orc::ColumnEncodingKind_DICTIONARY_V2) {
+ out << ", \"count\": " << stripe->getDictionarySize(col);
+ }
+ out << " }";
+ }
+ out << "\n ],\n";
+ out << " \"streams\": [\n";
+ for(uint64_t str = 0; str < stripe->getNumberOfStreams(); ++str) {
+ if (str != 0) {
+ out << ",\n";
+ }
+ ORC_UNIQUE_PTR<orc::StreamInformation> stream =
+ stripe->getStreamInformation(str);
+ out << " { \"id\": " << str
+ << ", \"column\": " << stream->getColumnId()
+ << ", \"kind\": \"" << streamKindToString(stream->getKind())
+ << "\",\n";
+ out << " \"offset\": " << stream->getOffset()
--- End diff --
This is may be nit-picking, but I find the output more readable if lines
67-68 are replaced with:
<< ", \"offset\": " << stream->getOffset()
> Replace tool implementation to use Reader interface
> ---------------------------------------------------
>
> Key: ORC-8
> URL: https://issues.apache.org/jira/browse/ORC-8
> Project: Orc
> Issue Type: Bug
> Reporter: Owen O'Malley
> Assignee: Owen O'Malley
>
> Currently FileMetadata is using custom parsing of the ORC footer and
> postscript. It should use the standard interface from Reader.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)