[ 
https://issues.apache.org/jira/browse/ORC-8?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15107163#comment-15107163
 ] 

ASF GitHub Bot commented on ORC-8:
----------------------------------

Github user asandryh commented on a diff in the pull request:

    https://github.com/apache/orc/pull/15#discussion_r50154368
  
    --- Diff: tools/src/FileMetadata.cc ---
    @@ -16,168 +16,162 @@
      * limitations under the License.
      */
     
    +#include <getopt.h>
     #include <iostream>
     #include <fstream>
     #include <vector>
     #include <string>
     #include <sstream>
    -#include <iomanip>
     
    -#include "wrap/orc-proto-wrapper.hh"
     #include "orc/OrcFile.hh"
     
    -using namespace orc::proto;
    -
    -uint64_t getTotalPaddingSize(const Footer& footer) {
    -  uint64_t paddedBytes = 0;
    -  StripeInformation stripe;
    -  for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
    -      stripe = footer.stripes(stripeIx-1);
    -      uint64_t prevStripeOffset = stripe.offset();
    -      uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
    -        stripe.footerlength();
    -      paddedBytes += footer.stripes(stripeIx).offset() -
    -        (prevStripeOffset + prevStripeLen);
    -  };
    -  return paddedBytes;
    +void printStripeInformation(std::ostream& out,
    +                            uint64_t index,
    +                            uint64_t columns,
    +                            std::unique_ptr<orc::StripeInformation> stripe,
    +                            bool verbose) {
    +  out << "    { \"stripe\": " << index
    +      << ", \"rows\": " << stripe->getNumberOfRows() << ",\n";
    +  out << "      \"offset\": " << stripe->getOffset()
    +      << ", \"length\": " << stripe->getLength() << ",\n";
    +  out << "      \"index\": " << stripe->getIndexLength()
    +      << ", \"data\": " << stripe->getDataLength()
    +      << ", \"footer\": " << stripe->getFooterLength();
    +  if (verbose) {
    +    out << ",\n      \"encodings\": [\n";
    +    for(uint64_t col=0; col < columns; ++col) {
    +      if (col != 0) {
    +        out << ",\n";
    +      }
    +      orc::ColumnEncodingKind encoding = stripe->getColumnEncoding(col);
    +      out << "         { \"column\": " << col
    +          << ", \"encoding\": \""
    +          << columnEncodingKindToString(encoding) << "\"";
    +      if (encoding == orc::ColumnEncodingKind_DICTIONARY ||
    +          encoding == orc::ColumnEncodingKind_DICTIONARY_V2) {
    +        out << ", \"count\": " << stripe->getDictionarySize(col);
    +      }
    +      out << " }";
    +    }
    +    out << "\n      ],\n";
    +    out << "      \"streams\": [\n";
    +    for(uint64_t str = 0; str < stripe->getNumberOfStreams(); ++str) {
    +      if (str != 0) {
    +        out << ",\n";
    +      }
    +      ORC_UNIQUE_PTR<orc::StreamInformation> stream =
    +        stripe->getStreamInformation(str);
    +      out << "        { \"id\": " << str
    +          << ", \"column\": " << stream->getColumnId()
    +          << ", \"kind\": \"" << streamKindToString(stream->getKind())
    +          << "\",\n";
    +      out << "          \"offset\": " << stream->getOffset()
    --- End diff --
    
    This is may be nit-picking, but I find the output more readable if lines 
67-68 are replaced with:
    << ", \"offset\": " << stream->getOffset()


> Replace tool implementation to use Reader interface
> ---------------------------------------------------
>
>                 Key: ORC-8
>                 URL: https://issues.apache.org/jira/browse/ORC-8
>             Project: Orc
>          Issue Type: Bug
>            Reporter: Owen O'Malley
>            Assignee: Owen O'Malley
>
> Currently FileMetadata is using custom parsing of the ORC footer and 
> postscript. It should use the standard interface from Reader.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to