incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return String without line break "\n"
Repository: incubator-eagle Updated Branches: refs/heads/master a6bc0a524 -> a710082d4 [EAGLE-615] Jsoup parse hive sql return String without line break "\n" EAGLE-615 Jsoup parse hive sql return String without line break "\n" - ADD "doc.outputSettings().prettyPrint(false);", get element value via getWholeText() not text() Author: chitin Closes #499 from chitin/EAGLE615. Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d Branch: refs/heads/master Commit: a710082d486e10b4732c00a06dd367dc556df60a Parents: a6bc0a5 Author: chitin Authored: Mon Oct 17 11:42:27 2016 +0800 Committer: Hao Chen Committed: Mon Oct 17 11:42:27 2016 +0800 -- .../hive/jobrunning/HiveJobFetchSpout.java | 35 +++- 1 file changed, 26 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java -- diff --git a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java index c0673b3..af4599b 100644 --- a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java +++ b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java @@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; +import org.apache.commons.lang.StringUtils; import org.apache.eagle.dataproc.impl.storm.ValuesArray; import org.apache.eagle.jpm.util.*; import org.apache.eagle.jpm.util.jobrecover.RunningJobManager; @@ -35,12 +36,14 @@ import org.apache.eagle.security.hive.config.RunningJobCrawlConfig; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.map.ObjectMapper; import org.jsoup.Jsoup; +import org.jsoup.nodes.TextNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; + import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.InputStream; @@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { private Long lastFinishAppTime; private RunningJobManager runningJobManager; private int partitionId; + static { OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); } @@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout { // sanity verify 0<=partitionId<=numTotalPartitions-1 if (partitionId < 0 || partitionId > crawlConfig.controlConfig.numTotalPartitions) { throw new IllegalStateException("partitionId should be less than numTotalPartitions with partitionId " + -partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); +partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); } Class partitionerCls = crawlConfig.controlConfig.partitionerCls; try { this.jobFilter = new JobIdFilterByPartition(partitionerCls.newInstance(), -crawlConfig.controlConfig.numTotalPartitions, partitionId); +crawlConfig.controlConfig.numTotalPartitions, partitionId); } catch (Exception e) { LOG.error("failing instantiating job partitioner class " + partitionerCls.getCanonicalName()); throw new IllegalStateException(e); } this.collector = collector; this.runningJobManager = new RunningJobManager(crawlConfig.zkStateConfig.zkQuorum, -crawlConfig.zkStateConfig.zkSessionTimeoutMs, -crawlConfig.zkStateConfig.zkRetryTimes, -crawlConfig.zkStateConfig.zkRetryInterval, -crawlConfig.zkStateConfig.zkRoot); +crawlConfig.zkStateConfig.zkSessionTimeoutMs, +crawlConfig.zkStateConfig.zkRetryTimes, +crawlConfig.zkStateConfig.zkRetryInterval, +crawlConfig.zkStateConfig.zkRoot); this.lastFinishAppTime = this.runningJobManager.recoverLastFinishedTime(partitionId); if (this.lastFini
[39/50] incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return String without line break "\n"
[EAGLE-615] Jsoup parse hive sql return String without line break "\n" EAGLE-615 Jsoup parse hive sql return String without line break "\n" - ADD "doc.outputSettings().prettyPrint(false);", get element value via getWholeText() not text() Author: chitin Closes #499 from chitin/EAGLE615. Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d Branch: refs/heads/master Commit: a710082d486e10b4732c00a06dd367dc556df60a Parents: a6bc0a5 Author: chitin Authored: Mon Oct 17 11:42:27 2016 +0800 Committer: Hao Chen Committed: Mon Oct 17 11:42:27 2016 +0800 -- .../hive/jobrunning/HiveJobFetchSpout.java | 35 +++- 1 file changed, 26 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java -- diff --git a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java index c0673b3..af4599b 100644 --- a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java +++ b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java @@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; +import org.apache.commons.lang.StringUtils; import org.apache.eagle.dataproc.impl.storm.ValuesArray; import org.apache.eagle.jpm.util.*; import org.apache.eagle.jpm.util.jobrecover.RunningJobManager; @@ -35,12 +36,14 @@ import org.apache.eagle.security.hive.config.RunningJobCrawlConfig; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.map.ObjectMapper; import org.jsoup.Jsoup; +import org.jsoup.nodes.TextNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; + import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.InputStream; @@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { private Long lastFinishAppTime; private RunningJobManager runningJobManager; private int partitionId; + static { OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); } @@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout { // sanity verify 0<=partitionId<=numTotalPartitions-1 if (partitionId < 0 || partitionId > crawlConfig.controlConfig.numTotalPartitions) { throw new IllegalStateException("partitionId should be less than numTotalPartitions with partitionId " + -partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); +partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); } Class partitionerCls = crawlConfig.controlConfig.partitionerCls; try { this.jobFilter = new JobIdFilterByPartition(partitionerCls.newInstance(), -crawlConfig.controlConfig.numTotalPartitions, partitionId); +crawlConfig.controlConfig.numTotalPartitions, partitionId); } catch (Exception e) { LOG.error("failing instantiating job partitioner class " + partitionerCls.getCanonicalName()); throw new IllegalStateException(e); } this.collector = collector; this.runningJobManager = new RunningJobManager(crawlConfig.zkStateConfig.zkQuorum, -crawlConfig.zkStateConfig.zkSessionTimeoutMs, -crawlConfig.zkStateConfig.zkRetryTimes, -crawlConfig.zkStateConfig.zkRetryInterval, -crawlConfig.zkStateConfig.zkRoot); +crawlConfig.zkStateConfig.zkSessionTimeoutMs, +crawlConfig.zkStateConfig.zkRetryTimes, +crawlConfig.zkStateConfig.zkRetryInterval, +crawlConfig.zkStateConfig.zkRoot); this.lastFinishAppTime = this.runningJobManager.recoverLastFinishedTime(partitionId); if (this.lastFinishAppTime == 0l) { this.lastFinishAppTime = Calendar.getInstance().getTimeInMi