[
https://issues.apache.org/jira/browse/ORC-21?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14997018#comment-14997018
]
ASF GitHub Bot commented on ORC-21:
-----------------------------------
Github user omalley commented on a diff in the pull request:
https://github.com/apache/orc/pull/12#discussion_r44307026
--- Diff: c++/src/Reader.cc ---
@@ -1364,6 +1365,111 @@ namespace orc {
int64_t getEpochOffset() const override;
};
+ uint64_t maxStreamsForType(const proto::Type& type) {
+ switch (static_cast<int64_t>(type.kind())) {
+ case proto::Type_Kind_STRUCT:
+ return 1;
+ case proto::Type_Kind_INT:
+ case proto::Type_Kind_LONG:
+ case proto::Type_Kind_SHORT:
+ case proto::Type_Kind_FLOAT:
+ case proto::Type_Kind_DOUBLE:
+ case proto::Type_Kind_BOOLEAN:
+ case proto::Type_Kind_BYTE:
+ case proto::Type_Kind_DATE:
+ case proto::Type_Kind_LIST:
+ case proto::Type_Kind_MAP:
+ case proto::Type_Kind_UNION:
+ return 2;
+ case proto::Type_Kind_BINARY:
+ case proto::Type_Kind_DECIMAL:
+ case proto::Type_Kind_TIMESTAMP:
+ return 3;
+ case proto::Type_Kind_CHAR:
+ case proto::Type_Kind_STRING:
+ case proto::Type_Kind_VARCHAR:
+ return 4;
+ default:
+ return 0;
+ }
+ }
+
+ uint64_t ReaderImpl::getMemoryUse(int stripeIx) {
+ uint64_t maxDataLength = 0;
+
+ if (stripeIx >= 0 && stripeIx < footer->stripes_size()) {
+ uint64_t stripe = footer->stripes(stripeIx).datalength();
+ if (maxDataLength < stripe) {
+ maxDataLength = stripe;
+ }
+ } else {
+ for (int i=0; i < footer->stripes_size(); i++) {
+ uint64_t stripe = footer->stripes(i).datalength();
+ if (maxDataLength < stripe) {
+ maxDataLength = stripe;
+ }
+ }
+ }
+
+ bool hasStringColumn = false;
+ uint64_t nSelectedStreams = 0;
+ for (int i=0; !hasStringColumn && i < footer->types_size(); i++) {
+ if (selectedColumns[i]) {
--- End diff --
clang needs:
if (selectedColumns[static_cast<size_t>(i)]) {
to avoid a message about using signed int where an unsigned int is needed.
> Add functionality to estimate memory footprint
> ----------------------------------------------
>
> Key: ORC-21
> URL: https://issues.apache.org/jira/browse/ORC-21
> Project: Orc
> Issue Type: Task
> Reporter: Aliaksei Sandryhaila
> Assignee: Aliaksei Sandryhaila
>
> ORC library allocates multiple large buffers to read and materialize ORC
> files. For stability of applications that use the library, it may be
> desirable to have an estimate (preferably, a tight upper bound) of a memory
> footprint.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)