incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return String without line break "\n"

2016-10-16 Thread hao
Repository: incubator-eagle
Updated Branches:
  refs/heads/master a6bc0a524 -> a710082d4


[EAGLE-615] Jsoup parse hive sql return String without line break "\n"

EAGLE-615 Jsoup parse hive sql return String without line break "\n"
- ADD "doc.outputSettings().prettyPrint(false);", get element value via 
getWholeText() not text()

Author: chitin 

Closes #499 from chitin/EAGLE615.


Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d

Branch: refs/heads/master
Commit: a710082d486e10b4732c00a06dd367dc556df60a
Parents: a6bc0a5
Author: chitin 
Authored: Mon Oct 17 11:42:27 2016 +0800
Committer: Hao Chen 
Committed: Mon Oct 17 11:42:27 2016 +0800

--
 .../hive/jobrunning/HiveJobFetchSpout.java  | 35 +++-
 1 file changed, 26 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
--
diff --git 
a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
 
b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
index c0673b3..af4599b 100644
--- 
a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
+++ 
b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
@@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext;
 import backtype.storm.topology.OutputFieldsDeclarer;
 import backtype.storm.topology.base.BaseRichSpout;
 import backtype.storm.tuple.Fields;
+import org.apache.commons.lang.StringUtils;
 import org.apache.eagle.dataproc.impl.storm.ValuesArray;
 import org.apache.eagle.jpm.util.*;
 import org.apache.eagle.jpm.util.jobrecover.RunningJobManager;
@@ -35,12 +36,14 @@ import 
org.apache.eagle.security.hive.config.RunningJobCrawlConfig;
 import org.codehaus.jackson.JsonParser;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.jsoup.Jsoup;
+import org.jsoup.nodes.TextNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
+
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.InputStream;
@@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
 private Long lastFinishAppTime;
 private RunningJobManager runningJobManager;
 private int partitionId;
+
 static {
 OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, 
true);
 }
@@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout {
 // sanity verify 0<=partitionId<=numTotalPartitions-1
 if (partitionId < 0 || partitionId > 
crawlConfig.controlConfig.numTotalPartitions) {
 throw new IllegalStateException("partitionId should be less than 
numTotalPartitions with partitionId " +
-partitionId + " and numTotalPartitions " + 
crawlConfig.controlConfig.numTotalPartitions);
+partitionId + " and numTotalPartitions " + 
crawlConfig.controlConfig.numTotalPartitions);
 }
 Class partitionerCls = 
crawlConfig.controlConfig.partitionerCls;
 try {
 this.jobFilter = new 
JobIdFilterByPartition(partitionerCls.newInstance(),
-crawlConfig.controlConfig.numTotalPartitions, partitionId);
+crawlConfig.controlConfig.numTotalPartitions, partitionId);
 } catch (Exception e) {
 LOG.error("failing instantiating job partitioner class " + 
partitionerCls.getCanonicalName());
 throw new IllegalStateException(e);
 }
 this.collector = collector;
 this.runningJobManager = new 
RunningJobManager(crawlConfig.zkStateConfig.zkQuorum,
-crawlConfig.zkStateConfig.zkSessionTimeoutMs,
-crawlConfig.zkStateConfig.zkRetryTimes,
-crawlConfig.zkStateConfig.zkRetryInterval,
-crawlConfig.zkStateConfig.zkRoot);
+crawlConfig.zkStateConfig.zkSessionTimeoutMs,
+crawlConfig.zkStateConfig.zkRetryTimes,
+crawlConfig.zkStateConfig.zkRetryInterval,
+crawlConfig.zkStateConfig.zkRoot);
 this.lastFinishAppTime = 
this.runningJobManager.recoverLastFinishedTime(partitionId);
 if (this.lastFini

[39/50] incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return String without line break "\n"

2016-10-18 Thread hao
[EAGLE-615] Jsoup parse hive sql return String without line break "\n"

EAGLE-615 Jsoup parse hive sql return String without line break "\n"
- ADD "doc.outputSettings().prettyPrint(false);", get element value via 
getWholeText() not text()

Author: chitin 

Closes #499 from chitin/EAGLE615.


Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d

Branch: refs/heads/master
Commit: a710082d486e10b4732c00a06dd367dc556df60a
Parents: a6bc0a5
Author: chitin 
Authored: Mon Oct 17 11:42:27 2016 +0800
Committer: Hao Chen 
Committed: Mon Oct 17 11:42:27 2016 +0800

--
 .../hive/jobrunning/HiveJobFetchSpout.java  | 35 +++-
 1 file changed, 26 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
--
diff --git 
a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
 
b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
index c0673b3..af4599b 100644
--- 
a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
+++ 
b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java
@@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext;
 import backtype.storm.topology.OutputFieldsDeclarer;
 import backtype.storm.topology.base.BaseRichSpout;
 import backtype.storm.tuple.Fields;
+import org.apache.commons.lang.StringUtils;
 import org.apache.eagle.dataproc.impl.storm.ValuesArray;
 import org.apache.eagle.jpm.util.*;
 import org.apache.eagle.jpm.util.jobrecover.RunningJobManager;
@@ -35,12 +36,14 @@ import 
org.apache.eagle.security.hive.config.RunningJobCrawlConfig;
 import org.codehaus.jackson.JsonParser;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.jsoup.Jsoup;
+import org.jsoup.nodes.TextNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
+
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.InputStream;
@@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout {
 private Long lastFinishAppTime;
 private RunningJobManager runningJobManager;
 private int partitionId;
+
 static {
 OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, 
true);
 }
@@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout {
 // sanity verify 0<=partitionId<=numTotalPartitions-1
 if (partitionId < 0 || partitionId > 
crawlConfig.controlConfig.numTotalPartitions) {
 throw new IllegalStateException("partitionId should be less than 
numTotalPartitions with partitionId " +
-partitionId + " and numTotalPartitions " + 
crawlConfig.controlConfig.numTotalPartitions);
+partitionId + " and numTotalPartitions " + 
crawlConfig.controlConfig.numTotalPartitions);
 }
 Class partitionerCls = 
crawlConfig.controlConfig.partitionerCls;
 try {
 this.jobFilter = new 
JobIdFilterByPartition(partitionerCls.newInstance(),
-crawlConfig.controlConfig.numTotalPartitions, partitionId);
+crawlConfig.controlConfig.numTotalPartitions, partitionId);
 } catch (Exception e) {
 LOG.error("failing instantiating job partitioner class " + 
partitionerCls.getCanonicalName());
 throw new IllegalStateException(e);
 }
 this.collector = collector;
 this.runningJobManager = new 
RunningJobManager(crawlConfig.zkStateConfig.zkQuorum,
-crawlConfig.zkStateConfig.zkSessionTimeoutMs,
-crawlConfig.zkStateConfig.zkRetryTimes,
-crawlConfig.zkStateConfig.zkRetryInterval,
-crawlConfig.zkStateConfig.zkRoot);
+crawlConfig.zkStateConfig.zkSessionTimeoutMs,
+crawlConfig.zkStateConfig.zkRetryTimes,
+crawlConfig.zkStateConfig.zkRetryInterval,
+crawlConfig.zkStateConfig.zkRoot);
 this.lastFinishAppTime = 
this.runningJobManager.recoverLastFinishedTime(partitionId);
 if (this.lastFinishAppTime == 0l) {
 this.lastFinishAppTime = Calendar.getInstance().getTimeInMi