Repository: hive Updated Branches: refs/heads/master d977a9d77 -> db4fe384f
HIVE-18560: qtests: QTestUtil refactor/split - QOutProcessor (Laszlo Bodor via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db4fe384 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db4fe384 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db4fe384 Branch: refs/heads/master Commit: db4fe384fdf57bb89d1f468b68ae6625e6f0ba77 Parents: d977a9d Author: Laszlo Bodor <bodorlaszlo0...@gmail.com> Authored: Wed Mar 14 15:32:34 2018 +0100 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Wed Mar 14 15:32:34 2018 +0100 ---------------------------------------------------------------------- .../control/AbstractCoreBlobstoreCliDriver.java | 6 +- .../apache/hadoop/hive/ql/QOutProcessor.java | 264 +++++++++++++++++++ .../org/apache/hadoop/hive/ql/QTestUtil.java | 201 +------------- .../hadoop/hive/ql/TestQOutProcessor.java | 73 +++++ .../apache/hadoop/hive/ql/TestQTestUtil.java | 88 ------- .../root_dir_external_table.q.out | 2 +- .../spark/root_dir_external_table.q.out | 2 +- 7 files changed, 352 insertions(+), 284 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCoreBlobstoreCliDriver.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCoreBlobstoreCliDriver.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCoreBlobstoreCliDriver.java index b1d66a5..dd80424 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCoreBlobstoreCliDriver.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCoreBlobstoreCliDriver.java @@ -27,7 +27,6 @@ import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Map; -import org.apache.hadoop.hive.cli.control.AbstractCliConfig.MetastoreType; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; @@ -40,6 +39,8 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import com.google.common.base.Strings; + public abstract class AbstractCoreBlobstoreCliDriver extends CliAdapter { protected static QTestUtil qt; @@ -180,6 +181,7 @@ public abstract class AbstractCoreBlobstoreCliDriver extends CliAdapter { + "-" + String.format("%03d", (int)(Math.random() * 999)); testBlobstorePathUnique = testBlobstorePath + uid; - qt.addPatternWithMaskComment(testBlobstorePathUnique, String.format("### %s ###", HCONF_TEST_BLOBSTORE_PATH)); + qt.getQOutProcessor().addPatternWithMaskComment(testBlobstorePathUnique, + String.format("### %s ###", HCONF_TEST_BLOBSTORE_PATH)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java new file mode 100644 index 0000000..024fa1b --- /dev/null +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hive.ql.QTestUtil.FsType; + +/** + * QOutProcessor: produces the final q.out from original q.out by postprocessing (e.g. masks) + * + */ +public class QOutProcessor { + public static final String PATH_HDFS_REGEX = "(hdfs://)([a-zA-Z0-9:/_\\-\\.=])+"; + public static final String PATH_HDFS_WITH_DATE_USER_GROUP_REGEX = + "([a-z]+) ([a-z]+)([ ]+)([0-9]+) ([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}) " + + PATH_HDFS_REGEX; + + public static final String HDFS_MASK = "### HDFS PATH ###"; + public static final String HDFS_DATE_MASK = "### HDFS DATE ###"; + public static final String HDFS_USER_MASK = "### USER ###"; + public static final String HDFS_GROUP_MASK = "### GROUP ###"; + + public static final String MASK_PATTERN = "#### A masked pattern was here ####"; + public static final String PARTIAL_MASK_PATTERN = "#### A PARTIAL masked pattern was here ####"; + + private FsType fsType = FsType.local; + + public static class LineProcessingResult { + private String line; + private boolean partialMaskWasMatched = false; + + public LineProcessingResult(String line) { + this.line = line; + } + + public String get() { + return line; + } + } + + private final Pattern[] planMask = toPattern(new String[] { + ".*file:.*", + ".*pfile:.*", + ".*/tmp/.*", + ".*invalidscheme:.*", + ".*lastUpdateTime.*", + ".*lastAccessTime.*", + ".*lastModifiedTime.*", + ".*[Oo]wner.*", + ".*CreateTime.*", + ".*LastAccessTime.*", + ".*Location.*", + ".*LOCATION '.*", + ".*transient_lastDdlTime.*", + ".*last_modified_.*", + ".*at org.*", + ".*at sun.*", + ".*at java.*", + ".*at junit.*", + ".*Caused by:.*", + ".*LOCK_QUERYID:.*", + ".*LOCK_TIME:.*", + ".*grantTime.*", + ".*[.][.][.] [0-9]* more.*", + ".*job_[0-9_]*.*", + ".*job_local[0-9_]*.*", + ".*USING 'java -cp.*", + "^Deleted.*", + ".*DagName:.*", + ".*DagId:.*", + ".*Input:.*/data/files/.*", + ".*Output:.*/data/files/.*", + ".*total number of created files now is.*", + ".*.hive-staging.*", + "pk_-?[0-9]*_[0-9]*_[0-9]*", + "fk_-?[0-9]*_[0-9]*_[0-9]*", + "uk_-?[0-9]*_[0-9]*_[0-9]*", + "nn_-?[0-9]*_[0-9]*_[0-9]*", // not null constraint name + "dc_-?[0-9]*_[0-9]*_[0-9]*", // default constraint name + ".*at com\\.sun\\.proxy.*", + ".*at com\\.jolbox.*", + ".*at com\\.zaxxer.*", + "org\\.apache\\.hadoop\\.hive\\.metastore\\.model\\.MConstraint@([0-9]|[a-z])*", + "^Repair: Added partition to metastore.*" + }); + + public QOutProcessor(FsType fsType) { + this.fsType = fsType; + } + + public QOutProcessor() { + this.fsType = FsType.hdfs; + } + + private Pattern[] toPattern(String[] patternStrs) { + Pattern[] patterns = new Pattern[patternStrs.length]; + for (int i = 0; i < patternStrs.length; i++) { + patterns[i] = Pattern.compile(patternStrs[i]); + } + return patterns; + } + + public void maskPatterns(String fname) throws Exception { + String line; + BufferedReader in; + BufferedWriter out; + + File file = new File(fname); + File fileOrig = new File(fname + ".orig"); + FileUtils.copyFile(file, fileOrig); + + in = new BufferedReader(new InputStreamReader(new FileInputStream(fileOrig), "UTF-8")); + out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); + + boolean lastWasMasked = false; + + while (null != (line = in.readLine())) { + LineProcessingResult result = processLine(line); + + if (result.line.equals(MASK_PATTERN)) { + // We're folding multiple masked lines into one. + if (!lastWasMasked) { + out.write(result.line); + out.write("\n"); + lastWasMasked = true; + result.partialMaskWasMatched = false; + } + } else { + out.write(result.line); + out.write("\n"); + lastWasMasked = false; + result.partialMaskWasMatched = false; + } + } + + in.close(); + out.close(); + } + + public LineProcessingResult processLine(String line) { + LineProcessingResult result = new LineProcessingResult(line); + + Matcher matcher = null; + + if (fsType == FsType.encrypted_hdfs) { + for (Pattern pattern : partialReservedPlanMask) { + matcher = pattern.matcher(result.line); + if (matcher.find()) { + result.line = PARTIAL_MASK_PATTERN + " " + matcher.group(0); + result.partialMaskWasMatched = true; + break; + } + } + } + else { + for (PatternReplacementPair prp : partialPlanMask) { + matcher = prp.pattern.matcher(result.line); + if (matcher.find()) { + result.line = result.line.replaceAll(prp.pattern.pattern(), prp.replacement); + result.partialMaskWasMatched = true; + } + } + } + + if (!result.partialMaskWasMatched) { + for (Pair<Pattern, String> pair : patternsWithMaskComments) { + Pattern pattern = pair.getLeft(); + String maskComment = pair.getRight(); + + matcher = pattern.matcher(result.line); + if (matcher.find()) { + result.line = matcher.replaceAll(maskComment); + result.partialMaskWasMatched = true; + break; + } + } + + for (Pattern pattern : planMask) { + result.line = pattern.matcher(result.line).replaceAll(MASK_PATTERN); + } + } + + return result; + } + + private final Pattern[] partialReservedPlanMask = toPattern(new String[] { + "data/warehouse/(.*?/)+\\.hive-staging" // the directory might be db/table/partition + //TODO: add more expected test result here + }); + /** + * Pattern to match and (partial) replacement text. + * For example, {"transaction":76,"bucketid":8249877}. We just want to mask 76 but a regex that + * matches just 76 will match a lot of other things. + */ + private final static class PatternReplacementPair { + private final Pattern pattern; + private final String replacement; + PatternReplacementPair(Pattern p, String r) { + pattern = p; + replacement = r; + } + } + private final PatternReplacementPair[] partialPlanMask; + { + ArrayList<PatternReplacementPair> ppm = new ArrayList<>(); + ppm.add(new PatternReplacementPair(Pattern.compile("\\{\"writeid\":[1-9][0-9]*,\"bucketid\":"), + "{\"writeid\":### Masked writeid ###,\"bucketid\":")); + + ppm.add(new PatternReplacementPair(Pattern.compile("attempt_[0-9_]+"), "attempt_#ID#")); + ppm.add(new PatternReplacementPair(Pattern.compile("vertex_[0-9_]+"), "vertex_#ID#")); + ppm.add(new PatternReplacementPair(Pattern.compile("task_[0-9_]+"), "task_#ID#")); + partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]); + } + /* This list may be modified by specific cli drivers to mask strings that change on every test */ + @SuppressWarnings("serial") + private final List<Pair<Pattern, String>> patternsWithMaskComments = + new ArrayList<Pair<Pattern, String>>() { + { + add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*", + "### BLOBSTORE_STAGING_PATH ###")); + add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, String.format("%s %s$3$4 %s $6%s", + HDFS_USER_MASK, HDFS_GROUP_MASK, HDFS_DATE_MASK, HDFS_MASK))); + add(toPatternPair(PATH_HDFS_REGEX, String.format("$1%s", HDFS_MASK))); + } + }; + + private Pair<Pattern, String> toPatternPair(String patternStr, String maskComment) { + return ImmutablePair.of(Pattern.compile(patternStr), maskComment); + } + + public void addPatternWithMaskComment(String patternStr, String maskComment) { + patternsWithMaskComments.add(toPatternPair(patternStr, maskComment)); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 82590ce..050f9d5 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -23,7 +23,6 @@ import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; -import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -34,7 +33,6 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; -import java.io.OutputStreamWriter; import java.io.PrintStream; import java.io.Serializable; import java.io.StringWriter; @@ -71,12 +69,9 @@ import java.util.regex.Pattern; import java.util.stream.Stream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileStatus; @@ -164,9 +159,6 @@ public class QTestUtil { private static final String TEST_TMP_DIR_PROPERTY = "test.tmp.dir"; // typically target/tmp private static final String BUILD_DIR_PROPERTY = "build.dir"; // typically target - public static final String PATH_HDFS_REGEX = "(hdfs://)([a-zA-Z0-9:/_\\-\\.=])+"; - public static final String PATH_HDFS_WITH_DATE_USER_GROUP_REGEX = "([a-z]+) ([a-z]+)([ ]+)([0-9]+) ([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}) " + PATH_HDFS_REGEX; - public static final String TEST_SRC_TABLES_PROPERTY = "test.src.tables"; private String testWarehouse; @@ -206,7 +198,7 @@ public class QTestUtil { private SparkSession sparkSession = null; private boolean isSessionStateStarted = false; private static final String javaVersion = getJavaVersion(); - + private QOutProcessor qOutProcessor; private final String initScript; private final String cleanupScript; @@ -587,6 +579,7 @@ public class QTestUtil { this.outDir = outDir; this.logDir = logDir; this.srcUDFs = getSrcUDFs(); + this.qOutProcessor = new QOutProcessor(fsType); // HIVE-14443 move this fall-back logic to CliConfigs if (confDir != null && !confDir.isEmpty()) { @@ -1668,186 +1661,6 @@ public class QTestUtil { return ret; } - private Pattern[] toPattern(String[] patternStrs) { - Pattern[] patterns = new Pattern[patternStrs.length]; - for (int i = 0; i < patternStrs.length; i++) { - patterns[i] = Pattern.compile(patternStrs[i]); - } - return patterns; - } - - private void maskPatterns(Pattern[] patterns, String fname) throws Exception { - String maskPattern = "#### A masked pattern was here ####"; - String partialMaskPattern = "#### A PARTIAL masked pattern was here ####"; - - String line; - BufferedReader in; - BufferedWriter out; - - File file = new File(fname); - File fileOrig = new File(fname + ".orig"); - FileUtils.copyFile(file, fileOrig); - - in = new BufferedReader(new InputStreamReader(new FileInputStream(fileOrig), "UTF-8")); - out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); - - boolean lastWasMasked = false; - boolean partialMaskWasMatched = false; - Matcher matcher; - while (null != (line = in.readLine())) { - if (fsType == FsType.encrypted_hdfs) { - for (Pattern pattern : partialReservedPlanMask) { - matcher = pattern.matcher(line); - if (matcher.find()) { - line = partialMaskPattern + " " + matcher.group(0); - partialMaskWasMatched = true; - break; - } - } - } - else { - for (PatternReplacementPair prp : partialPlanMask) { - matcher = prp.pattern.matcher(line); - if (matcher.find()) { - line = line.replaceAll(prp.pattern.pattern(), prp.replacement); - partialMaskWasMatched = true; - } - } - } - - if (!partialMaskWasMatched) { - for (Pair<Pattern, String> pair : patternsWithMaskComments) { - Pattern pattern = pair.getLeft(); - String maskComment = pair.getRight(); - - matcher = pattern.matcher(line); - if (matcher.find()) { - line = matcher.replaceAll(maskComment); - partialMaskWasMatched = true; - break; - } - } - - for (Pattern pattern : patterns) { - line = pattern.matcher(line).replaceAll(maskPattern); - } - } - - if (line.equals(maskPattern)) { - // We're folding multiple masked lines into one. - if (!lastWasMasked) { - out.write(line); - out.write("\n"); - lastWasMasked = true; - partialMaskWasMatched = false; - } - } else { - out.write(line); - out.write("\n"); - lastWasMasked = false; - partialMaskWasMatched = false; - } - } - - in.close(); - out.close(); - } - - private final Pattern[] planMask = toPattern(new String[] { - ".*file:.*", - ".*pfile:.*", - ".*/tmp/.*", - ".*invalidscheme:.*", - ".*lastUpdateTime.*", - ".*lastAccessTime.*", - ".*lastModifiedTime.*", - ".*[Oo]wner.*", - ".*CreateTime.*", - ".*LastAccessTime.*", - ".*Location.*", - ".*LOCATION '.*", - ".*transient_lastDdlTime.*", - ".*last_modified_.*", - ".*at org.*", - ".*at sun.*", - ".*at java.*", - ".*at junit.*", - ".*Caused by:.*", - ".*LOCK_QUERYID:.*", - ".*LOCK_TIME:.*", - ".*grantTime.*", - ".*[.][.][.] [0-9]* more.*", - ".*job_[0-9_]*.*", - ".*job_local[0-9_]*.*", - ".*USING 'java -cp.*", - "^Deleted.*", - ".*DagName:.*", - ".*DagId:.*", - ".*Input:.*/data/files/.*", - ".*Output:.*/data/files/.*", - ".*total number of created files now is.*", - ".*.hive-staging.*", - "pk_-?[0-9]*_[0-9]*_[0-9]*", - "fk_-?[0-9]*_[0-9]*_[0-9]*", - "uk_-?[0-9]*_[0-9]*_[0-9]*", - "nn_-?[0-9]*_[0-9]*_[0-9]*", // not null constraint name - "dc_-?[0-9]*_[0-9]*_[0-9]*", // default constraint name - ".*at com\\.sun\\.proxy.*", - ".*at com\\.jolbox.*", - ".*at com\\.zaxxer.*", - "org\\.apache\\.hadoop\\.hive\\.metastore\\.model\\.MConstraint@([0-9]|[a-z])*", - "^Repair: Added partition to metastore.*" - }); - - private final Pattern[] partialReservedPlanMask = toPattern(new String[] { - "data/warehouse/(.*?/)+\\.hive-staging" // the directory might be db/table/partition - //TODO: add more expected test result here - }); - /** - * Pattern to match and (partial) replacement text. - * For example, {"writeid":76,"bucketid":8249877}. We just want to mask 76 but a regex that - * matches just 76 will match a lot of other things. - */ - private final static class PatternReplacementPair { - private final Pattern pattern; - private final String replacement; - PatternReplacementPair(Pattern p, String r) { - pattern = p; - replacement = r; - } - } - private final PatternReplacementPair[] partialPlanMask; - { - ArrayList<PatternReplacementPair> ppm = new ArrayList<>(); - ppm.add(new PatternReplacementPair(Pattern.compile("\\{\"writeid\":[1-9][0-9]*,\"bucketid\":"), - "{\"writeid\":### Masked writeid ###,\"bucketid\":")); - - ppm.add(new PatternReplacementPair(Pattern.compile("attempt_[0-9_]+"), "attempt_#ID#")); - ppm.add(new PatternReplacementPair(Pattern.compile("vertex_[0-9_]+"), "vertex_#ID#")); - ppm.add(new PatternReplacementPair(Pattern.compile("task_[0-9_]+"), "task_#ID#")); - partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]); - } - /* This list may be modified by specific cli drivers to mask strings that change on every test */ - @SuppressWarnings("serial") - private final List<Pair<Pattern, String>> patternsWithMaskComments = - new ArrayList<Pair<Pattern, String>>() { - { - add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*", - "### BLOBSTORE_STAGING_PATH ###")); - add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, - "### USER ### ### GROUP ###$3$4 ### HDFS DATE ### $6### HDFS PATH ###")); - add(toPatternPair(PATH_HDFS_REGEX, "$1### HDFS PATH ###")); - } - }; - - private Pair<Pattern, String> toPatternPair(String patternStr, String maskComment) { - return ImmutablePair.of(Pattern.compile(patternStr), maskComment); - } - - public void addPatternWithMaskComment(String patternStr, String maskComment) { - patternsWithMaskComments.add(toPatternPair(patternStr, maskComment)); - } - public QTestProcessExecResult checkCliDriverResults(String tname) throws Exception { assert(qMap.containsKey(tname)); @@ -1856,7 +1669,7 @@ public class QTestUtil { File f = new File(logDir, tname + outFileExtension); - maskPatterns(planMask, f.getPath()); + qOutProcessor.maskPatterns(f.getPath()); QTestProcessExecResult exitVal = executeDiffCommand(f.getPath(), outFileName, false, qSortSet.contains(tname)); @@ -1873,9 +1686,9 @@ public class QTestUtil { public QTestProcessExecResult checkCompareCliDriverResults(String tname, List<String> outputs) throws Exception { assert outputs.size() > 1; - maskPatterns(planMask, outputs.get(0)); + qOutProcessor.maskPatterns(outputs.get(0)); for (int i = 1; i < outputs.size(); ++i) { - maskPatterns(planMask, outputs.get(i)); + qOutProcessor.maskPatterns(outputs.get(i)); QTestProcessExecResult result = executeDiffCommand( outputs.get(i - 1), outputs.get(i), false, qSortSet.contains(tname)); if (result.getReturnCode() != 0) { @@ -2597,4 +2410,8 @@ public class QTestUtil { } } } + + public QOutProcessor getQOutProcessor() { + return qOutProcessor; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java ---------------------------------------------------------------------- diff --git a/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java b/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java new file mode 100644 index 0000000..aa363ef --- /dev/null +++ b/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.hive.ql.QTestUtil.FsType; +import org.junit.Assert; +import org.junit.Test; + +/** + * This class contains unit tests for QTestUtil + */ +public class TestQOutProcessor { + QOutProcessor qOutProcessor = new QOutProcessor(FsType.local); + + @Test + public void testSelectiveHdfsPatternMaskOnlyHdfsPath() { + Assert.assertEquals("nothing to be masked", processLine("nothing to be masked")); + Assert.assertEquals("hdfs://", processLine("hdfs://")); + Assert.assertEquals(String.format("hdfs://%s", QOutProcessor.HDFS_MASK), + processLine("hdfs:///")); + Assert.assertEquals(String.format("hdfs://%s", QOutProcessor.HDFS_MASK), + processLine("hdfs://a")); + Assert.assertEquals(String.format("hdfs://%s other text", QOutProcessor.HDFS_MASK), + processLine("hdfs://tmp.dfs.com:50029/tmp other text")); + Assert.assertEquals(String.format("hdfs://%s", QOutProcessor.HDFS_MASK), processLine( + "hdfs://localhost:51594/build/ql/test/data/warehouse/default/encrypted_table_dp/p=2014-09-23")); + + Assert.assertEquals(String.format("hdfs://%s", QOutProcessor.HDFS_MASK), + processLine("hdfs://localhost:11111/tmp/ct_noperm_loc_foo1")); + + Assert.assertEquals( + String.format("hdfs://%s hdfs://%s", QOutProcessor.HDFS_MASK, QOutProcessor.HDFS_MASK), + processLine("hdfs://one hdfs://two")); + + Assert.assertEquals( + String.format( + "some text before [name=hdfs://%s]] some text between hdfs://%s some text after", + QOutProcessor.HDFS_MASK, QOutProcessor.HDFS_MASK), + processLine( + "some text before [name=hdfs://localhost:11111/tmp/ct_noperm_loc_foo1]] some text between hdfs://localhost:22222/tmp/ct_noperm_loc_foo2 some text after")); + + Assert.assertEquals( + String.format("-rw-r--r-- 3 %s %s 2557 %s hdfs://%s", QOutProcessor.HDFS_USER_MASK, + QOutProcessor.HDFS_GROUP_MASK, QOutProcessor.HDFS_DATE_MASK, QOutProcessor.HDFS_MASK), + processLine( + "-rw-r--r-- 3 hiveptest supergroup 2557 2018-01-11 17:09 hdfs://hello_hdfs_path")); + + Assert.assertEquals( + String.format("-rw-r--r-- 3 %s %s 2557 %s hdfs://%s", QOutProcessor.HDFS_USER_MASK, + QOutProcessor.HDFS_GROUP_MASK, QOutProcessor.HDFS_DATE_MASK, QOutProcessor.HDFS_MASK), + processLine( + "-rw-r--r-- 3 hiveptest supergroup 2557 2018-01-11 17:09 hdfs://hello_hdfs_path")); + } + + private String processLine(String line) { + return qOutProcessor.processLine(line).get(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQTestUtil.java ---------------------------------------------------------------------- diff --git a/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQTestUtil.java b/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQTestUtil.java deleted file mode 100644 index 1a8eb33..0000000 --- a/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQTestUtil.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql; - -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.junit.Assert; -import org.junit.Test; - -/** - * This class contains unit tests for QTestUtil - */ -public class TestQTestUtil { - private static final String TEST_HDFS_MASK = "###HDFS###"; - private static final String TEST_HDFS_DATE_MASK = "###HDFS_DATE###"; - private static final String TEST_HDFS_USER_MASK = "###USER###"; - private static final String TEST_HDFS_GROUP_MASK = "###GROUP###"; - - @Test - public void testSelectiveHdfsPatternMaskOnlyHdfsPath() { - Assert.assertEquals("nothing to be masked", maskHdfs("nothing to be masked")); - Assert.assertEquals("hdfs://", maskHdfs("hdfs://")); - Assert.assertEquals(String.format("hdfs://%s", TEST_HDFS_MASK), maskHdfs("hdfs://a")); - Assert.assertEquals(String.format("hdfs://%s other text", TEST_HDFS_MASK), - maskHdfs("hdfs://tmp.dfs.com:50029/tmp other text")); - Assert.assertEquals(String.format("hdfs://%s", TEST_HDFS_MASK), maskHdfs( - "hdfs://localhost:51594/build/ql/test/data/warehouse/default/encrypted_table_dp/p=2014-09-23")); - - String line = maskHdfs("hdfs://localhost:11111/tmp/ct_noperm_loc_foo1"); - Assert.assertEquals(String.format("hdfs://%s", TEST_HDFS_MASK), line); - - line = maskHdfs("hdfs://one hdfs://two"); - Assert.assertEquals(String.format("hdfs://%s hdfs://%s", TEST_HDFS_MASK, TEST_HDFS_MASK), line); - - line = maskHdfs( - "some text before [name=hdfs://localhost:11111/tmp/ct_noperm_loc_foo1]] some text between hdfs://localhost:22222/tmp/ct_noperm_loc_foo2 some text after"); - Assert.assertEquals(String.format( - "some text before [name=hdfs://%s]] some text between hdfs://%s some text after", - TEST_HDFS_MASK, TEST_HDFS_MASK), line); - - line = maskHdfsWithDateUserGroup( - "-rw-r--r-- 3 hiveptest supergroup 2557 2018-01-11 17:09 hdfs://hello_hdfs_path"); - Assert.assertEquals(String.format("-rw-r--r-- 3 %s %s 2557 %s hdfs://%s", - TEST_HDFS_USER_MASK, TEST_HDFS_GROUP_MASK, TEST_HDFS_DATE_MASK, TEST_HDFS_MASK), line); - - line = maskHdfs(maskHdfsWithDateUserGroup( - "-rw-r--r-- 3 hiveptest supergroup 2557 2018-01-11 17:09 hdfs://hello_hdfs_path")); - Assert.assertEquals(String.format("-rw-r--r-- 3 %s %s 2557 %s hdfs://%s", - TEST_HDFS_USER_MASK, TEST_HDFS_GROUP_MASK, TEST_HDFS_DATE_MASK, TEST_HDFS_MASK), line); - } - - private String maskHdfs(String line) { - Matcher matcher = Pattern.compile(QTestUtil.PATH_HDFS_REGEX).matcher(line); - - if (matcher.find()) { - line = matcher.replaceAll(String.format("$1%s", TEST_HDFS_MASK)); - } - - return line; - } - - private String maskHdfsWithDateUserGroup(String line) { - Matcher matcher = Pattern.compile(QTestUtil.PATH_HDFS_WITH_DATE_USER_GROUP_REGEX).matcher(line); - - if (matcher.find()) { - line = matcher.replaceAll(String.format("%s %s$3$4 %s $6%s", TEST_HDFS_USER_MASK, - TEST_HDFS_GROUP_MASK, TEST_HDFS_DATE_MASK, TEST_HDFS_MASK)); - } - - return line; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/ql/src/test/results/clientpositive/root_dir_external_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/root_dir_external_table.q.out b/ql/src/test/results/clientpositive/root_dir_external_table.q.out index 7ba2b5b..4953e43 100644 --- a/ql/src/test/results/clientpositive/root_dir_external_table.q.out +++ b/ql/src/test/results/clientpositive/root_dir_external_table.q.out @@ -6,7 +6,7 @@ POSTHOOK: query: insert overwrite directory "hdfs://### HDFS PATH ###" select ke POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PREHOOK: query: create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///' +PREHOOK: query: create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs://### HDFS PATH ###' PREHOOK: type: CREATETABLE PREHOOK: Input: hdfs://### HDFS PATH ### PREHOOK: Output: database:default http://git-wip-us.apache.org/repos/asf/hive/blob/db4fe384/ql/src/test/results/clientpositive/spark/root_dir_external_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/root_dir_external_table.q.out b/ql/src/test/results/clientpositive/spark/root_dir_external_table.q.out index 7ba2b5b..4953e43 100644 --- a/ql/src/test/results/clientpositive/spark/root_dir_external_table.q.out +++ b/ql/src/test/results/clientpositive/spark/root_dir_external_table.q.out @@ -6,7 +6,7 @@ POSTHOOK: query: insert overwrite directory "hdfs://### HDFS PATH ###" select ke POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PREHOOK: query: create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs:///' +PREHOOK: query: create external table roottable (key string) row format delimited fields terminated by '\\t' stored as textfile location 'hdfs://### HDFS PATH ###' PREHOOK: type: CREATETABLE PREHOOK: Input: hdfs://### HDFS PATH ### PREHOOK: Output: database:default