hive git commit: HIVE-17823 : Fix subquery Qtest of Hive on Spark (Dapeng Sun, reviewed by Vineet Garg)
Repository: hive Updated Branches: refs/heads/master 0ea30c35e -> 42581930e HIVE-17823 : Fix subquery Qtest of Hive on Spark (Dapeng Sun, reviewed by Vineet Garg) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/42581930 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/42581930 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/42581930 Branch: refs/heads/master Commit: 42581930ebfd8a5f037e6f488fa585ff047bac0d Parents: 0ea30c3 Author: Dapeng SunAuthored: Wed Oct 18 19:43:56 2017 -0700 Committer: Vineet Garg Committed: Wed Oct 18 19:44:48 2017 -0700 -- .../clientpositive/spark/subquery_multi.q.out | 40 +++--- .../clientpositive/spark/subquery_notin.q.out | 136 +-- .../clientpositive/spark/subquery_scalar.q.out | 46 +++ .../clientpositive/spark/subquery_select.q.out | 26 ++-- .../clientpositive/spark/subquery_views.q.out | 36 ++--- 5 files changed, 142 insertions(+), 142 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/42581930/ql/src/test/results/clientpositive/spark/subquery_multi.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index f9b2c1b..ff9b921 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -2149,18 +2149,18 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (p_container is not null and p_type is not null) (type: boolean) -Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +predicate: ((p_container = p_container) and (p_type = p_type)) (type: boolean) +Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string), p_name (type: string), p_container (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) -Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -2232,12 +2232,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 -Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: @@ -2285,16 +2285,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE Select Operator
hive git commit: HIVE-17805 : SchemaTool validate locations should not return exit 1 (Vihang Karajgaonkar)
Repository: hive Updated Branches: refs/heads/branch-2 cda7e01c5 -> dd39330f6 HIVE-17805 : SchemaTool validate locations should not return exit 1 (Vihang Karajgaonkar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dd39330f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dd39330f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dd39330f Branch: refs/heads/branch-2 Commit: dd39330f6f0de775063d46580809bab4135fa7f0 Parents: cda7e01 Author: Vihang KarajgaonkarAuthored: Fri Oct 13 15:22:43 2017 -0700 Committer: Vihang Karajgaonkar Committed: Wed Oct 18 17:52:50 2017 -0700 -- beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/dd39330f/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java -- diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index 4255ebd..2af5b2c 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -618,14 +618,12 @@ public class HiveSchemaTool { if (validateLocations(conn, this.validationServers)) { System.out.println("[SUCCESS]\n"); } else { -success = false; -System.out.println("[FAIL]\n"); +System.out.println("[WARN]\n"); } if (validateColumnNullValues(conn)) { System.out.println("[SUCCESS]\n"); } else { -success = false; -System.out.println("[FAIL]\n"); +System.out.println("[WARN]\n"); } } finally { if (conn != null) {
hive git commit: HIVE-17805 : SchemaTool validate locations should not return exit 1 (Vihang Karajgaonkar)
Repository: hive Updated Branches: refs/heads/master 7decd4218 -> 0ea30c35e HIVE-17805 : SchemaTool validate locations should not return exit 1 (Vihang Karajgaonkar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0ea30c35 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0ea30c35 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0ea30c35 Branch: refs/heads/master Commit: 0ea30c35e5d748c178f4f9a58f41f34e0029e44f Parents: 7decd42 Author: Vihang KarajgaonkarAuthored: Fri Oct 13 15:22:43 2017 -0700 Committer: Vihang Karajgaonkar Committed: Wed Oct 18 17:39:09 2017 -0700 -- beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0ea30c35/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java -- diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java index 7c65fe1..5350311 100644 --- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java +++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java @@ -614,14 +614,12 @@ public class HiveSchemaTool { if (validateLocations(conn, this.validationServers)) { System.out.println("[SUCCESS]\n"); } else { -success = false; -System.out.println("[FAIL]\n"); +System.out.println("[WARN]\n"); } if (validateColumnNullValues(conn)) { System.out.println("[SUCCESS]\n"); } else { -success = false; -System.out.println("[FAIL]\n"); +System.out.println("[WARN]\n"); } } finally { if (conn != null) {
[1/2] hive git commit: HIVE-12631 : LLAP IO: support ORC ACID tables (Teddy Choi, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master c6c374eb0 -> 7decd4218 http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out b/ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out new file mode 100644 index 000..f00a690 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -0,0 +1,361 @@ +PREHOOK: query: DROP TABLE orc_llap_acid_fast +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_acid_fast +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap_acid_fast ( +cint INT, +cbigint BIGINT, +cfloat FLOAT, +cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_acid_fast +POSTHOOK: query: CREATE TABLE orc_llap_acid_fast ( +cint INT, +cbigint BIGINT, +cfloat FLOAT, +cdouble DOUBLE) +partitioned by (csmallint smallint) +clustered by (cint) into 2 buckets stored as orc +TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_acid_fast +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 1) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=1 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=1).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 2) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble asc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=2 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=2).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: query: insert into table orc_llap_acid_fast partition (csmallint = 3) +select cint, cbigint, cfloat, cdouble from alltypesorc order by cdouble desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_acid_fast@csmallint=3 +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast PARTITION(csmallint=3).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_llap_acid_fast
[2/2] hive git commit: HIVE-12631 : LLAP IO: support ORC ACID tables (Teddy Choi, reviewed by Sergey Shelukhin)
HIVE-12631 : LLAP IO: support ORC ACID tables (Teddy Choi, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7decd421 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7decd421 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7decd421 Branch: refs/heads/master Commit: 7decd4218135154180fd21f4b73be5e954814d40 Parents: c6c374e Author: sergeyAuthored: Wed Oct 18 15:19:18 2017 -0700 Committer: sergey Committed: Wed Oct 18 15:19:30 2017 -0700 -- .../test/resources/testconfiguration.properties | 2 + .../hive/llap/io/api/impl/LlapInputFormat.java | 18 +- .../hive/llap/io/api/impl/LlapRecordReader.java | 83 - .../llap/io/decode/OrcEncodedDataConsumer.java | 3 +- .../llap/io/encoded/OrcEncodedDataReader.java | 26 +- .../exec/vector/VectorExpressionDescriptor.java | 3 + .../hive/ql/exec/vector/VectorExtractRow.java | 2 +- .../hive/ql/exec/vector/VectorMapOperator.java | 11 +- .../ql/exec/vector/VectorizationContext.java| 5 + .../ql/exec/vector/VectorizedRowBatchCtx.java | 48 ++- .../VectorExpressionWriterFactory.java | 52 ++- .../hadoop/hive/ql/io/LlapAwareSplit.java | 4 +- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 19 +- .../apache/hadoop/hive/ql/io/orc/OrcSplit.java | 35 +- .../io/orc/VectorizedOrcAcidRowBatchReader.java | 156 +--- .../ql/io/orc/VectorizedOrcAcidRowReader.java | 36 +- .../orc/encoded/EncodedTreeReaderFactory.java | 10 +- .../ql/parse/UpdateDeleteSemanticAnalyzer.java | 4 + .../queries/clientpositive/llap_acid_fast.q | 49 +++ .../clientpositive/llap/acid_no_buckets.q.out | 8 +- .../results/clientpositive/llap/llap_acid.q.out | 321 + .../clientpositive/llap/llap_acid_fast.q.out| 361 +++ .../results/clientpositive/llap_acid_fast.q.out | 315 23 files changed, 1437 insertions(+), 134 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8d92da3..06ebc98 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -190,6 +190,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ kill_query.q,\ leftsemijoin.q,\ limit_pushdown.q,\ + llap_acid.q,\ + llap_acid_fast.q,\ load_dyn_part1.q,\ load_dyn_part2.q,\ load_dyn_part3.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/7decd421/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 79ec4ed..1cf5f49 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -138,7 +138,7 @@ public class LlapInputFormat implements InputFormat
hive git commit: HIVE-17789: Flaky test: TestSessionManagerMetrics.testAbandonedSessionMetrics has timing related problems (Andrew Sherman, reviewed by Sahil Takiar, Aihua Xu)
Repository: hive Updated Branches: refs/heads/master c129bb95d -> c6c374eb0 HIVE-17789: Flaky test: TestSessionManagerMetrics.testAbandonedSessionMetrics has timing related problems (Andrew Sherman, reviewed by Sahil Takiar, Aihua Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c6c374eb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c6c374eb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c6c374eb Branch: refs/heads/master Commit: c6c374eb09c28baa4d8fd9e5de30aa3554ef804e Parents: c129bb9 Author: Andrew ShermanAuthored: Wed Oct 18 14:16:07 2017 -0700 Committer: Sahil Takiar Committed: Wed Oct 18 14:16:59 2017 -0700 -- .../cli/session/TestSessionManagerMetrics.java | 22 +++- 1 file changed, 17 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c6c374eb/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java -- diff --git a/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java b/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java index 5f418c7..646159f 100644 --- a/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java +++ b/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java @@ -25,6 +25,8 @@ import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; + +import com.fasterxml.jackson.databind.JsonNode; import org.apache.hadoop.hive.common.metrics.MetricsTestUtils; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; @@ -32,7 +34,6 @@ import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics; import org.apache.hadoop.hive.common.metrics.metrics2.MetricsReporting; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.util.Time; import org.apache.hive.service.cli.FetchOrientation; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.OperationHandle; @@ -373,9 +374,20 @@ public class TestSessionManagerMetrics { sm.openSession(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V9, "user", "passw", "127.0.0.1", new HashMap ()); -Thread.sleep(3200); - -json = metrics.dumpJson(); -MetricsTestUtils.verifyMetricsJson(json, MetricsTestUtils.COUNTER, MetricsConstant.HS2_ABANDONED_SESSIONS, 1); +// We're going to wait for the session to be abandoned. +String currentValue; +int count = 5; // how many times we'll sleep before giving up +String expectedValue = "1"; +do { + // HIVE_SERVER2_SESSION_CHECK_INTERVAL is set to 3 seconds, so we have to wait for at least + // that long to see an abandoned session + Thread.sleep(3200); + json = metrics.dumpJson(); + currentValue = MetricsTestUtils + .getJsonNode(json, MetricsTestUtils.COUNTER, MetricsConstant.HS2_ABANDONED_SESSIONS) + .asText(); + // loop until the value is correct or we run out of tries +} while (!expectedValue.equals(currentValue) && --count > 0); +Assert.assertEquals(expectedValue, currentValue); } }
[3/4] hive git commit: HIVE-17164: Vectorization: Support PTF (Part 2: Unbounded Support-- Turn ON by default) (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/ql/src/test/queries/clientpositive/vector_ptf_part_simple.q -- diff --git a/ql/src/test/queries/clientpositive/vector_ptf_part_simple.q b/ql/src/test/queries/clientpositive/vector_ptf_part_simple.q index bd3b3e4..fc9f9eb 100644 --- a/ql/src/test/queries/clientpositive/vector_ptf_part_simple.q +++ b/ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -502,3 +502,50 @@ from vector_ptf_part_simple_orc; select p_mfgr, p_name, p_retailprice, rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r from vector_ptf_part_simple_orc; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc; + +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc; + + +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc; + http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/ql/src/test/queries/clientpositive/vector_windowing_expressions.q -- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_expressions.q b/ql/src/test/queries/clientpositive/vector_windowing_expressions.q index 7d8c5d5..6a37c4e 100644 --- a/ql/src/test/queries/clientpositive/vector_windowing_expressions.q +++ b/ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -92,3 +92,38 @@ round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50. from part window w1 as (distribute by p_mfgr sort by p_retailprice) limit 11; + + +-- +-- Run some tests with these parameters that force spilling to disk. +-- +set hive.vectorized.ptf.max.memory.buffering.batch.count=1; +set hive.vectorized.testing.reducer.batch.size=2; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; + +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; + +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; + +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +select * from t1 limit 3; +select * from t2 limit 3; + +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11;
[1/4] hive git commit: HIVE-17164: Vectorization: Support PTF (Part 2: Unbounded Support-- Turn ON by default) (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master acb3ba274 -> c129bb95d http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out -- diff --git a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out index beb01b4..26e2f9b 100644 --- a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out +++ b/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out @@ -1820,3 +1820,226 @@ Manufacturer#2 1698.66 25 true Manufacturer#2 1701.6 18 true Manufacturer#2 1800.7 40 true Manufacturer#2 2031.98 2 true +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part + A masked pattern was here +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part + A masked pattern was here +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 truetrue +Manufacturer#1 1173.15 2 truetrue +Manufacturer#1 1414.42 28 truetrue +Manufacturer#1 1602.59 6 truetrue +Manufacturer#1 1632.66 42 truetrue +Manufacturer#1 1753.76 34 truetrue +Manufacturer#2 1690.68 14 truetrue +Manufacturer#2 1698.66 25 truetrue +Manufacturer#2 1701.6 18 truetrue +Manufacturer#2 1800.7 40 truetrue +Manufacturer#2 2031.98 2 truetrue +Manufacturer#3 1190.27 14 truetrue +Manufacturer#3 1337.29 45 truetrue +Manufacturer#3 1410.39 19 truetrue +Manufacturer#3 1671.68 17 truetrue +Manufacturer#3 1922.98 1 truetrue +Manufacturer#4 1206.26 27 truetrue +Manufacturer#4 1290.35 12 truetrue +Manufacturer#4 1375.42 39 truetrue +Manufacturer#4 1620.67 10 truetrue +Manufacturer#4 1844.92 7 truetrue +Manufacturer#5 1018.1 46 truetrue +Manufacturer#5 1464.48 23 truetrue +Manufacturer#5 1611.66 6 truetrue +Manufacturer#5 1788.73 2 truetrue +Manufacturer#5 1789.69 31 truetrue +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part + A masked pattern was here +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part + A masked pattern was here +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.72003 3755.72003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.94005 5085.94005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.6101 7527.6101 +Manufacturer#4 1206.26 27 1 1206.26 1201.26
[4/4] hive git commit: HIVE-17164: Vectorization: Support PTF (Part 2: Unbounded Support-- Turn ON by default) (Matt McCline, reviewed by Teddy Choi)
HIVE-17164: Vectorization: Support PTF (Part 2: Unbounded Support-- Turn ON by default) (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c129bb95 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c129bb95 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c129bb95 Branch: refs/heads/master Commit: c129bb95d1f864a004f21a051bbcddfb61464528 Parents: acb3ba2 Author: Matt McClineAuthored: Wed Oct 18 14:19:41 2017 -0500 Committer: Matt McCline Committed: Wed Oct 18 14:19:41 2017 -0500 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 12 +- .../persistence/HybridHashTableContainer.java |8 +- .../hive/ql/exec/tez/ReduceRecordProcessor.java |3 +- .../hive/ql/exec/tez/ReduceRecordSource.java| 16 +- .../hive/ql/exec/vector/VectorSerializeRow.java | 16 + .../ql/exec/vector/VectorizedBatchUtil.java | 232 +- .../VectorMapJoinGenerateResultOperator.java|5 +- .../mapjoin/VectorMapJoinRowBytesContainer.java | 321 --- .../exec/vector/ptf/VectorPTFGroupBatches.java | 273 ++- .../ql/exec/vector/ptf/VectorPTFOperator.java | 39 +- .../VectorRowBytesContainer.java| 331 +++ .../hive/ql/optimizer/physical/Vectorizer.java | 69 +- .../apache/hadoop/hive/ql/plan/BaseWork.java|9 + .../hadoop/hive/ql/plan/VectorPTFDesc.java | 33 +- .../vector/TestVectorRowBytesContainer.java | 76 + .../TestVectorMapJoinRowBytesContainer.java | 75 - .../clientpositive/vector_ptf_part_simple.q | 47 + .../vector_windowing_expressions.q | 35 + .../llap/vector_ptf_part_simple.q.out | 342 +++ .../llap/vector_windowing_expressions.q.out | 2245 ++ .../vector_windowing_expressions.q.out | 223 ++ .../hive/ql/exec/vector/BytesColumnVector.java |2 +- .../hive/ql/exec/vector/ColumnVector.java |5 +- .../ql/exec/vector/DecimalColumnVector.java |2 +- .../hive/ql/exec/vector/DoubleColumnVector.java |2 +- .../vector/IntervalDayTimeColumnVector.java |2 +- .../hive/ql/exec/vector/ListColumnVector.java |2 +- .../hive/ql/exec/vector/LongColumnVector.java |2 +- .../hive/ql/exec/vector/MapColumnVector.java|2 +- .../ql/exec/vector/MultiValuedColumnVector.java |4 +- .../hive/ql/exec/vector/StructColumnVector.java |2 +- .../ql/exec/vector/TimestampColumnVector.java |2 +- .../hive/ql/exec/vector/UnionColumnVector.java |2 +- 33 files changed, 3915 insertions(+), 524 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1934126..a6ecb37 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2872,9 +2872,17 @@ public class HiveConf extends Configuration { "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + "2. all: use VectorUDFAdaptor for all UDFs" ), -HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", false, +HIVE_VECTORIZATION_PTF_ENABLED("hive.vectorized.execution.ptf.enabled", true, "This flag should be set to true to enable vectorized mode of the PTF of query execution.\n" + -"The default value is false."), +"The default value is true."), + + HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count", 25, +"Maximum number of vectorized row batches to buffer in memory for PTF\n" + +"The default value is 25"), + HIVE_VECTORIZATION_TESTING_REDUCER_BATCH_SIZE("hive.vectorized.testing.reducer.batch.size", -1, +"internal use only, used for creating small group key vectorized row batches to exercise more logic\n" + +"The default value is -1 which means don't restrict for testing", +true), HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED("hive.vectorized.complex.types.enabled", true, "This flag should be set to true to enable vectorization\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
[2/4] hive git commit: HIVE-17164: Vectorization: Support PTF (Part 2: Unbounded Support-- Turn ON by default) (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/c129bb95/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out new file mode 100644 index 000..a734e22 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -0,0 +1,2245 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator +key expressions: p_mfgr (type: string), p_retailprice (type: double) +sort order: ++ +Map-reduce partition columns: p_mfgr (type: string) +Reduce Sink Vectorization: +className: VectorReduceSinkObjectHashOperator +keyColumns: [2, 7] +native: true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +partitionColumns: [2] +valueColumns: [5] +Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE +value expressions: p_size (type: int) +Execution mode: vectorized, llap +LLAP IO: no inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +groupByVectorOutput: true +inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: true +usesVectorUDFAdaptor: false +
hive git commit: HIVE-17825: Socket not closed when trying to read files to copy over in replication from metadata (Anishek Agarwal reviewed by Thejas Nair)
Repository: hive Updated Branches: refs/heads/master f973243a5 -> acb3ba274 HIVE-17825: Socket not closed when trying to read files to copy over in replication from metadata (Anishek Agarwal reviewed by Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/acb3ba27 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/acb3ba27 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/acb3ba27 Branch: refs/heads/master Commit: acb3ba274befb343ce30c8ac3b0c4d73bb74c0ef Parents: f973243 Author: Anishek AgarwalAuthored: Wed Oct 18 11:54:56 2017 +0530 Committer: Anishek Agarwal Committed: Wed Oct 18 11:54:56 2017 +0530 -- .../hadoop/hive/ql/exec/ReplCopyTask.java | 55 +++- 1 file changed, 29 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/acb3ba27/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java index c69741b..80905d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java @@ -144,10 +144,13 @@ public class ReplCopyTask extends Task implements Serializable { if (dstFs.exists(destFile)) { String destFileWithSourceName = srcFile.getSourcePath().getName(); Path newDestFile = new Path(toPath, destFileWithSourceName); - dstFs.rename(destFile, newDestFile); + boolean result = dstFs.rename(destFile, newDestFile); + if (!result) { +throw new IllegalStateException( +"could not rename " + destFile.getName() + " to " + newDestFile.getName()); + } } } - return 0; } catch (Exception e) { console.printError("Failed with exception " + e.getMessage(), "\n" @@ -167,32 +170,32 @@ public class ReplCopyTask extends Task implements Serializable { } List filePaths = new ArrayList<>(); -BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fileListing))); -// TODO : verify if skipping charset here is okay - -String line = null; -while ((line = br.readLine()) != null) { - LOG.debug("ReplCopyTask :_filesReadLine:" + line); - - String[] fileWithChksum = ReplChangeManager.getFileWithChksumFromURI(line); - try { -ReplChangeManager.FileInfo f = ReplChangeManager -.getFileInfo(new Path(fileWithChksum[0]), fileWithChksum[1], conf); -filePaths.add(f); - } catch (MetaException e) { -// issue warning for missing file and throw exception -LOG.warn("Cannot find " + fileWithChksum[0] + " in source repo or cmroot"); -throw new IOException(e.getMessage()); +try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(fileListing { + // TODO : verify if skipping charset here is okay + + String line = null; + while ((line = br.readLine()) != null) { +LOG.debug("ReplCopyTask :_filesReadLine:" + line); + +String[] fileWithChksum = ReplChangeManager.getFileWithChksumFromURI(line); +try { + ReplChangeManager.FileInfo f = ReplChangeManager + .getFileInfo(new Path(fileWithChksum[0]), fileWithChksum[1], conf); + filePaths.add(f); +} catch (MetaException e) { + // issue warning for missing file and throw exception + LOG.warn("Cannot find " + fileWithChksum[0] + " in source repo or cmroot"); + throw new IOException(e.getMessage()); +} +// Note - we need srcFs rather than fs, because it is possible that the _files lists files +// which are from a different filesystem than the fs where the _files file itself was loaded +// from. Currently, it is possible, for eg., to do REPL LOAD hdfs:///dir/ and for the _files +// in it to contain hdfs:/// entries, and/or vice-versa, and this causes errors. +// It might also be possible that there will be a mix of them in a given _files file. +// TODO: revisit close to the end of replv2 dev, to see if our assumption now still holds, +// and if not so, optimize. } - // Note - we need srcFs rather than fs, because it is possible that the _files lists files - // which are from a different filesystem than the fs where the _files file itself was loaded - // from. Currently, it is possible, for eg., to do REPL LOAD hdfs:///dir/ and for the _files - // in it to contain hdfs:/// entries, and/or vice-versa, and this causes errors. - // It