[ https://issues.apache.org/jira/browse/RANGER-4404?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Bhavik Patel updated RANGER-4404: --------------------------------- Description: Currently if we have 50GB audit log file in spool directory then it is taking 4-5hr for the conversion and writing to HDFS. Also, we are observing below error logs {code:java} ERROR [AuditFileQueueSpool_hdfs_destWriter] provider.BaseAuditHandler: Error writing to log file. java.lang.RuntimeException: Overflow of newLength. smallBuffer.length=1073741824, nextElemLength=38 at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.increaseBufferSpace(BytesColumnVector.java:311) at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:182) at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:207) at org.apache.ranger.audit.utils.ORCFileUtil.log(ORCFileUtil.java:143) at org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:77) at org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:73) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762) at org.apache.ranger.audit.provider.MiscUtil.executePrivilegedAction(MiscUtil.java:541) at org.apache.ranger.audit.utils.RangerORCAuditWriter.logAuditAsORC(RangerORCAuditWriter.java:73) at org.apache.ranger.audit.utils.RangerORCAuditWriter.logAsORC(RangerORCAuditWriter.java:159) at org.apache.ranger.audit.utils.RangerORCAuditWriter.log(RangerORCAuditWriter.java:112) at org.apache.ranger.audit.destination.HDFSAuditDestination.logJSON(HDFSAuditDestination.java:78) at org.apache.ranger.audit.destination.HDFSAuditDestination.log(HDFSAuditDestination.java:163) at org.apache.ranger.audit.queue.AuditFileQueueSpool.sendEvent(AuditFileQueueSpool.java:926) at org.apache.ranger.audit.queue.AuditFileQueueSpool.logEvent(AuditFileQueueSpool.java:913) at org.apache.ranger.audit.queue.AuditFileQueueSpool.runLogAudit(AuditFileQueueSpool.java:847) at org.apache.ranger.audit.queue.AuditFileQueueSpool.run(AuditFileQueueSpool.java:790) {code} hive-storage-api version upgrade(>=2.7.3) required to resolve the above error. Current version is 2.7.2 cc: [~rmani] [~fateh288] was: Currently if we have 50GB audit log file in spool directory then it is taking 4-5hr for the conversion and writing to HDFS. Also, we are observing below error logs {code:java} ERROR [AuditFileQueueSpool_hdfs_destWriter] provider.BaseAuditHandler: Error writing to log file. java.lang.RuntimeException: Overflow of newLength. smallBuffer.length=1073741824, nextElemLength=38 at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.increaseBufferSpace(BytesColumnVector.java:311) at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:182) at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:207) at org.apache.ranger.audit.utils.ORCFileUtil.log(ORCFileUtil.java:143) at org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:77) at org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:73) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762) at org.apache.ranger.audit.provider.MiscUtil.executePrivilegedAction(MiscUtil.java:541) at org.apache.ranger.audit.utils.RangerORCAuditWriter.logAuditAsORC(RangerORCAuditWriter.java:73) at org.apache.ranger.audit.utils.RangerORCAuditWriter.logAsORC(RangerORCAuditWriter.java:159) at org.apache.ranger.audit.utils.RangerORCAuditWriter.log(RangerORCAuditWriter.java:112) at org.apache.ranger.audit.destination.HDFSAuditDestination.logJSON(HDFSAuditDestination.java:78) at org.apache.ranger.audit.destination.HDFSAuditDestination.log(HDFSAuditDestination.java:163) at org.apache.ranger.audit.queue.AuditFileQueueSpool.sendEvent(AuditFileQueueSpool.java:926) at org.apache.ranger.audit.queue.AuditFileQueueSpool.logEvent(AuditFileQueueSpool.java:913) at org.apache.ranger.audit.queue.AuditFileQueueSpool.runLogAudit(AuditFileQueueSpool.java:847) at org.apache.ranger.audit.queue.AuditFileQueueSpool.run(AuditFileQueueSpool.java:790) {code} hive-storage-api version upgrade(>=2.7.3) required to resolve the above error. Current version is 2.7.2 > Audit to hdfs for orc format feature stabilisation > -------------------------------------------------- > > Key: RANGER-4404 > URL: https://issues.apache.org/jira/browse/RANGER-4404 > Project: Ranger > Issue Type: Improvement > Components: audit > Affects Versions: 3.0.0, 2.4.0 > Reporter: Bhavik Patel > Assignee: Bhavik Patel > Priority: Major > > Currently if we have 50GB audit log file in spool directory then it is taking > 4-5hr for the conversion and writing to HDFS. > Also, we are observing below error logs > {code:java} > ERROR [AuditFileQueueSpool_hdfs_destWriter] provider.BaseAuditHandler: Error > writing to log file. > java.lang.RuntimeException: Overflow of newLength. > smallBuffer.length=1073741824, nextElemLength=38 > at > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.increaseBufferSpace(BytesColumnVector.java:311) > at > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:182) > at > org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setVal(BytesColumnVector.java:207) > at org.apache.ranger.audit.utils.ORCFileUtil.log(ORCFileUtil.java:143) > at > org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:77) > at > org.apache.ranger.audit.utils.RangerORCAuditWriter$1.run(RangerORCAuditWriter.java:73) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762) > at > org.apache.ranger.audit.provider.MiscUtil.executePrivilegedAction(MiscUtil.java:541) > at > org.apache.ranger.audit.utils.RangerORCAuditWriter.logAuditAsORC(RangerORCAuditWriter.java:73) > at > org.apache.ranger.audit.utils.RangerORCAuditWriter.logAsORC(RangerORCAuditWriter.java:159) > at > org.apache.ranger.audit.utils.RangerORCAuditWriter.log(RangerORCAuditWriter.java:112) > at > org.apache.ranger.audit.destination.HDFSAuditDestination.logJSON(HDFSAuditDestination.java:78) > at > org.apache.ranger.audit.destination.HDFSAuditDestination.log(HDFSAuditDestination.java:163) > at > org.apache.ranger.audit.queue.AuditFileQueueSpool.sendEvent(AuditFileQueueSpool.java:926) > at > org.apache.ranger.audit.queue.AuditFileQueueSpool.logEvent(AuditFileQueueSpool.java:913) > at > org.apache.ranger.audit.queue.AuditFileQueueSpool.runLogAudit(AuditFileQueueSpool.java:847) > at > org.apache.ranger.audit.queue.AuditFileQueueSpool.run(AuditFileQueueSpool.java:790) > {code} > hive-storage-api version upgrade(>=2.7.3) required to resolve the above error. > Current version is 2.7.2 > cc: [~rmani] [~fateh288] -- This message was sent by Atlassian Jira (v8.20.10#820010)