[
https://issues.apache.org/jira/browse/ORC-21?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14996647#comment-14996647
]
ASF GitHub Bot commented on ORC-21:
-----------------------------------
Github user asandryh commented on a diff in the pull request:
https://github.com/apache/orc/pull/12#discussion_r44282290
--- Diff: c++/src/Reader.cc ---
@@ -1364,6 +1364,111 @@ namespace orc {
int64_t getEpochOffset() const override;
};
+ uint64_t maxStreamsForType(const proto::Type& type) {
+ switch (type.kind()) {
+ case proto::Type_Kind_STRUCT:
+ return 1;
+ case proto::Type_Kind_INT:
+ case proto::Type_Kind_LONG:
+ case proto::Type_Kind_SHORT:
+ case proto::Type_Kind_FLOAT:
+ case proto::Type_Kind_DOUBLE:
+ case proto::Type_Kind_BOOLEAN:
+ case proto::Type_Kind_BYTE:
+ case proto::Type_Kind_DATE:
+ case proto::Type_Kind_LIST:
+ case proto::Type_Kind_MAP:
+ case proto::Type_Kind_UNION:
+ return 2;
+ case proto::Type_Kind_BINARY:
+ case proto::Type_Kind_DECIMAL:
+ case proto::Type_Kind_TIMESTAMP:
+ return 3;
+ case proto::Type_Kind_CHAR:
+ case proto::Type_Kind_STRING:
+ case proto::Type_Kind_VARCHAR:
+ return 4;
+ default:
+ return 0;
+ }
+ }
+
+ uint64_t ReaderImpl::memoryUse(int stripeIx) {
+ uint64_t maxDataLength = 0;
+
+ if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
+ uint64_t stripe = footer->stripes(stripeIx).datalength();
+ if (maxDataLength < stripe) {
+ maxDataLength = stripe;
+ }
+ } else {
+ for (int i=0; i < footer->stripes_size(); i++) {
+ uint64_t stripe = footer->stripes(i).datalength();
+ if (maxDataLength < stripe) {
+ maxDataLength = stripe;
+ }
+ }
+ }
+
+ bool hasStringColumn = false;
+ uint64_t nSelectedStreams = 0;
+ for (int i=0; !hasStringColumn && i < footer->types_size(); i++) {
+ if (selectedColumns[i]) {
+ const proto::Type& type = footer->types(i);
+ nSelectedStreams += maxStreamsForType(type) ;
+ switch (type.kind()) {
--- End diff --
Yes, will add casting here.
> Add functionality to estimate memory footprint
> ----------------------------------------------
>
> Key: ORC-21
> URL: https://issues.apache.org/jira/browse/ORC-21
> Project: Orc
> Issue Type: Task
> Reporter: Aliaksei Sandryhaila
> Assignee: Aliaksei Sandryhaila
>
> ORC library allocates multiple large buffers to read and materialize ORC
> files. For stability of applications that use the library, it may be
> desirable to have an estimate (preferably, a tight upper bound) of a memory
> footprint.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)