[ 
https://issues.apache.org/jira/browse/ORC-21?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14996647#comment-14996647
 ] 

ASF GitHub Bot commented on ORC-21:
-----------------------------------

Github user asandryh commented on a diff in the pull request:

    https://github.com/apache/orc/pull/12#discussion_r44282290
  
    --- Diff: c++/src/Reader.cc ---
    @@ -1364,6 +1364,111 @@ namespace orc {
         int64_t getEpochOffset() const override;
       };
     
    +  uint64_t maxStreamsForType(const proto::Type& type) {
    +    switch (type.kind()) {
    +      case proto::Type_Kind_STRUCT:
    +        return 1;
    +      case proto::Type_Kind_INT:
    +      case proto::Type_Kind_LONG:
    +      case proto::Type_Kind_SHORT:
    +      case proto::Type_Kind_FLOAT:
    +      case proto::Type_Kind_DOUBLE:
    +      case proto::Type_Kind_BOOLEAN:
    +      case proto::Type_Kind_BYTE:
    +      case proto::Type_Kind_DATE:
    +      case proto::Type_Kind_LIST:
    +      case proto::Type_Kind_MAP:
    +      case proto::Type_Kind_UNION:
    +        return 2;
    +      case proto::Type_Kind_BINARY:
    +      case proto::Type_Kind_DECIMAL:
    +      case proto::Type_Kind_TIMESTAMP:
    +        return 3;
    +      case proto::Type_Kind_CHAR:
    +      case proto::Type_Kind_STRING:
    +      case proto::Type_Kind_VARCHAR:
    +        return 4;
    +      default:
    +          return 0;
    +      }
    +  }
    +
    +  uint64_t ReaderImpl::memoryUse(int stripeIx) {
    +    uint64_t maxDataLength = 0;
    +
    +    if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
    +      uint64_t stripe = footer->stripes(stripeIx).datalength();
    +      if (maxDataLength < stripe) {
    +        maxDataLength = stripe;
    +      }
    +    } else {
    +      for (int i=0; i < footer->stripes_size(); i++) {
    +        uint64_t stripe = footer->stripes(i).datalength();
    +        if (maxDataLength < stripe) {
    +          maxDataLength = stripe;
    +        }
    +      }
    +    }
    +
    +    bool hasStringColumn = false;
    +    uint64_t nSelectedStreams = 0;
    +    for (int i=0; !hasStringColumn && i < footer->types_size(); i++) {
    +      if (selectedColumns[i]) {
    +        const proto::Type& type = footer->types(i);
    +        nSelectedStreams += maxStreamsForType(type) ;
    +        switch (type.kind()) {
    --- End diff --
    
    Yes, will add casting here.


> Add functionality to estimate memory footprint
> ----------------------------------------------
>
>                 Key: ORC-21
>                 URL: https://issues.apache.org/jira/browse/ORC-21
>             Project: Orc
>          Issue Type: Task
>            Reporter: Aliaksei Sandryhaila
>            Assignee: Aliaksei Sandryhaila
>
> ORC library allocates multiple large buffers to read and materialize ORC 
> files. For stability of applications that use the library, it may be 
> desirable to have an estimate (preferably, a tight upper bound) of a memory 
> footprint.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to