[
https://issues.apache.org/jira/browse/FALCON-437?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13993791#comment-13993791
]
Satish Mittal commented on FALCON-437:
--------------------------------------
workflow definition:
{noformat}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workflow-app xmlns="uri:oozie:workflow:0.3"
name="FALCON_FEED_REPLICATION_hcat-in-repl3">
<start to="should-record"/>
<decision name="should-record">
<switch>
<case to="recordsize">
${shouldRecord=="true"}
</case>
<default to="replication-decision"/>
</switch>
</decision>
<action name="recordsize" retry-max="3" retry-interval="1">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<job-xml>${wf:appPath()}/conf/falcon-source-hive-site.xml</job-xml>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>hcatalog</value>
</property>
</configuration>
<main-class>org.apache.falcon.latedata.LateDataHandler</main-class>
<arg>-out</arg>
<arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg>
<arg>-paths</arg>
<arg>${falconInPaths}</arg>
<arg>-falconInputFeeds</arg>
<arg>${falconInputFeeds}</arg>
<arg>-falconInputFeedStorageTypes</arg>
<arg>${falconInputFeedStorageTypes}</arg>
<capture-output/>
</java>
<ok to="replication-decision"/>
<error to="failed-post-processing"/>
</action>
<decision name="replication-decision">
<switch>
<case to="table-export">
${falconFeedStorageType == "TABLE"}
</case>
<default to="replication"/>
</switch>
</decision>
<action name="table-export">
<hive:hive xmlns:hive="uri:oozie:hive-action:0.2"
xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${falconSourceJobTracker}</job-tracker>
<name-node>${falconSourceNameNode}</name-node>
<prepare>
<delete path="${distcpSourcePaths}"/>
</prepare>
<job-xml>${wf:appPath()}/conf/falcon-source-hive-site.xml</job-xml>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
</configuration>
<script>${wf:appPath()}/scripts/falcon-table-export.hql</script>
<param>falconSourceDatabase=${falconSourceDatabase}</param>
<param>falconSourceTable=${falconSourceTable}</param>
<param>falconSourcePartition=${falconSourcePartition}</param>
<param>falconSourceStagingDir=${distcpSourcePaths}</param>
</hive:hive>
<ok to="replication"/>
<error to="failed-post-processing"/>
</action>
<action name="replication">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.falcon.replication.FeedReplicator</main-class>
<arg>-Dfalcon.include.path=${sourceRelativePaths}</arg>
<arg>-Dmapred.job.queue.name=${queueName}</arg>
<arg>-Dmapred.job.priority=${jobPriority}</arg>
<arg>-maxMaps</arg>
<arg>${maxMaps}</arg>
<arg>-mapBandwidthKB</arg>
<arg>${mapBandwidthKB}</arg>
<arg>-sourcePaths</arg>
<arg>${distcpSourcePaths}</arg>
<arg>-targetPath</arg>
<arg>${distcpTargetPaths}</arg>
<arg>-falconFeedStorageType</arg>
<arg>${falconFeedStorageType}</arg>
<file>${wf:conf("falcon.libpath")}/hadoop-distcp.jar</file>
</java>
<ok to="post-replication-decision"/>
<error to="failed-post-processing"/>
</action>
<decision name="post-replication-decision">
<switch>
<case to="table-import">
${falconFeedStorageType == "TABLE"}
</case>
<default to="succeeded-post-processing"/>
</switch>
</decision>
<action name="table-import">
<hive:hive xmlns:hive="uri:oozie:hive-action:0.2"
xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${falconTargetJobTracker}</job-tracker>
<name-node>${falconTargetNameNode}</name-node>
<job-xml>${wf:appPath()}/conf/falcon-target-hive-site.xml</job-xml>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
</configuration>
<script>${wf:appPath()}/scripts/falcon-table-import.hql</script>
<param>falconTargetDatabase=${falconTargetDatabase}</param>
<param>falconTargetTable=${falconTargetTable}</param>
<param>falconTargetPartition=${falconTargetPartition}</param>
<param>falconTargetStagingDir=${distcpTargetPaths}</param>
</hive:hive>
<ok to="succeeded-post-processing"/>
<error to="failed-post-processing"/>
</action>
<action name="succeeded-post-processing" retry-max="3" retry-interval="1">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
<arg>-cluster</arg>
<arg>${cluster}</arg>
<arg>-entityType</arg>
<arg>${entityType}</arg>
<arg>-entityName</arg>
<arg>${entityName}</arg>
<arg>-nominalTime</arg>
<arg>${nominalTime}</arg>
<arg>-operation</arg>
<arg>REPLICATE</arg>
<arg>-workflowId</arg>
<arg>${wf:id()}</arg>
<arg>-runId</arg>
<arg>${wf:run()}</arg>
<arg>-status</arg>
<arg>SUCCEEDED</arg>
<arg>-timeStamp</arg>
<arg>${timeStamp}</arg>
<arg>-brokerImplClass</arg>
<arg>${wf:conf("broker.impl.class")}</arg>
<arg>-brokerUrl</arg>
<arg>${wf:conf("broker.url")}</arg>
<arg>-userBrokerImplClass</arg>
<arg>${userBrokerImplClass}</arg>
<arg>-userBrokerUrl</arg>
<arg>${userBrokerUrl}</arg>
<arg>-brokerTTL</arg>
<arg>${wf:conf("broker.ttlInMins")}</arg>
<arg>-feedNames</arg>
<arg>${feedNames}</arg>
<arg>-feedInstancePaths</arg>
<arg>${feedInstancePaths}</arg>
<arg>-logFile</arg>
<arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
<arg>-workflowEngineUrl</arg>
<arg>${workflowEngineUrl}</arg>
<arg>-userWorkflowName</arg>
<arg>${userWorkflowName}</arg>
<arg>-userWorkflowVersion</arg>
<arg>${userWorkflowVersion}</arg>
<arg>-userWorkflowEngine</arg>
<arg>${userWorkflowEngine}</arg>
<arg>-subflowId</arg>
<arg>${wf:id()}</arg>
<arg>-logDir</arg>
<arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
<arg>-workflowUser</arg>
<arg>${wf:user()}</arg>
<arg>-falconInputFeeds</arg>
<arg>${falconInputFeeds}</arg>
<arg>-falconInputPaths</arg>
<arg>${falconInPaths}</arg>
<file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
<file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
<file>${wf:conf("falcon.libpath")}/jms.jar</file>
<file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
<file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
<file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
</java>
<ok to="end"/>
<error to="fail"/>
</action>
<action name="failed-post-processing" retry-max="3" retry-interval="1">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
<value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.falcon.workflow.FalconPostProcessing</main-class>
<arg>-cluster</arg>
<arg>${cluster}</arg>
<arg>-entityType</arg>
<arg>${entityType}</arg>
<arg>-entityName</arg>
<arg>${entityName}</arg>
<arg>-nominalTime</arg>
<arg>${nominalTime}</arg>
<arg>-operation</arg>
<arg>REPLICATE</arg>
<arg>-workflowId</arg>
<arg>${wf:id()}</arg>
<arg>-runId</arg>
<arg>${wf:run()}</arg>
<arg>-status</arg>
<arg>FAILED</arg>
<arg>-timeStamp</arg>
<arg>${timeStamp}</arg>
<arg>-brokerImplClass</arg>
<arg>${wf:conf("broker.impl.class")}</arg>
<arg>-brokerUrl</arg>
<arg>${wf:conf("broker.url")}</arg>
<arg>-userBrokerImplClass</arg>
<arg>${userBrokerImplClass}</arg>
<arg>-userBrokerUrl</arg>
<arg>${userBrokerUrl}</arg>
<arg>-brokerTTL</arg>
<arg>${wf:conf("broker.ttlInMins")}</arg>
<arg>-feedNames</arg>
<arg>${feedNames}</arg>
<arg>-feedInstancePaths</arg>
<arg>${feedInstancePaths}</arg>
<arg>-logFile</arg>
<arg>${logDir}/instancePaths-${nominalTime}-${srcClusterName}.csv</arg>
<arg>-workflowEngineUrl</arg>
<arg>${workflowEngineUrl}</arg>
<arg>-subflowId</arg>
<arg>${wf:id()}</arg>
<arg>-logDir</arg>
<arg>${logDir}/job-${nominalTime}/${srcClusterName}/</arg>
<arg>-workflowUser</arg>
<arg>${wf:user()}</arg>
<file>${wf:conf("falcon.libpath")}/activemq-core.jar</file>
<file>${wf:conf("falcon.libpath")}/geronimo-j2ee-management.jar</file>
<file>${wf:conf("falcon.libpath")}/jms.jar</file>
<file>${wf:conf("falcon.libpath")}/json-simple.jar</file>
<file>${wf:conf("falcon.libpath")}/oozie-client.jar</file>
<file>${wf:conf("falcon.libpath")}/spring-jms.jar</file>
</java>
<ok to="fail"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>
Workflow failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<end name="end"/>
</workflow-app>
{noformat}
workflow job configuration:
{noformat}
<configuration>
<property>
<name>falconTargetNameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>falconInPaths</name>
<value>hcat://hostname:5055/default/table3/year=2014;month=05;day=09;hour=15;minute=30</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>feedNames</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>falcon.libpath</name>
<value>/projects/falcon/hcolo2/working/lib</value>
</property>
<property>
<name>falconSourceNameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>entityType</name>
<value>feed</value>
</property>
<property>
<name>hcatNode</name>
<value>hcat://hostname:5055</value>
</property>
<property>
<name>falconTargetJobTracker</name>
<value>hostname:8021</value>
</property>
<property>
<name>feedInstancePaths</name>
<value>hcat://hostname:5055/default/table4/year=2014;month=05;day=09;hour=15;minute=30</value>
</property>
<property>
<name>oozie.bundle.application.path</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032</value>
</property>
<property>
<name>logDir</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/logs</value>
</property>
<property>
<name>falconTargetPartition</name>
<value>(minute='30' AND month='05' AND year='2014' AND hour='15' AND
day='09')</value>
</property>
<property>
<name>falconInputFeedStorageTypes</name>
<value>TABLE</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>falconSourceHcatNode</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>oozie.wf.external.id</name>
<value>hcat-in-repl3/REPLICATION/2014-05-09T15:30Z</value>
</property>
<property>
<name>falconSourcePartition</name>
<value>(minute='30' AND month='05' AND year='2014' AND hour='15' AND
day='09')</value>
</property>
<property>
<name>distcpSourcePaths</name>
<value>hdfs://hostname:9000//projects/falcon/hcolo1/staging/FALCON_FEED_REPLICATION_hcat-in-repl3_hcat-cluster1/default/table3/year=2014/2014-05-09-15-30/hcat-cluster2/data</value>
</property>
<property>
<name>falconSourceTable</name>
<value>table3</value>
</property>
<property>
<name>userWorkflowName</name>
<value>replication-policy</value>
</property>
<property>
<name>mapBandwidthKB</name>
<value>102400</value>
</property>
<property>
<name>srcClusterName</name>
<value>hcat-cluster1</value>
</property>
<property>
<name>userBrokerUrl</name>
<value>tcp://localhost:61616?daemon=true</value>
</property>
<property>
<name>user.name</name>
<value>falcon</value>
</property>
<property>
<name>falconTargetHcatNode</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>oozie.bundle.id</name>
<value>0000009-140508101950577-oozie-oozi-B</value>
</property>
<property>
<name>jobPriority</name>
<value>NORMAL</value>
</property>
<property>
<name>oozie.wf.application.path</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1</value>
</property>
<property>
<name>maxMaps</name>
<value>5</value>
</property>
<property>
<name>oozie.coord.application.path</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1.xml</value>
</property>
<property>
<name>shouldRecord</name>
<value>true</value>
</property>
<property>
<name>timeStamp</name>
<value>2014-05-09-14-55</value>
</property>
<property>
<name>falconTargetTable</name>
<value>table4</value>
</property>
<property>
<name>broker.url</name>
<value>tcp://localhost:61616</value>
</property>
<property>
<name>nominalTime</name>
<value>2014-05-09-15-30</value>
</property>
<property>
<name>userWorkflowEngine</name>
<value>falcon</value>
</property>
<property>
<name>hcat.metastore.uri</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>broker.ttlInMins</name>
<value>4320</value>
</property>
<property>
<name>queueName</name>
<value>default</value>
</property>
<property>
<name>distcpTargetPaths</name>
<value>hdfs://hostname//projects/falcon/hcolo2/staging/FALCON_FEED_REPLICATION_hcat-in-repl3_hcat-cluster2/default/table4/year=2014/2014-05-09-15-30/hcat-cluster2/data</value>
</property>
<property>
<name>falconFeedStorageType</name>
<value>TABLE</value>
</property>
<property>
<name>workflowEngineUrl</name>
<value>http://localhost:11000/oozie/</value>
</property>
<property>
<name>userBrokerImplClass</name>
<value>org.apache.activemq.ActiveMQConnectionFactory</value>
</property>
<property>
<name>falconSourceJobTracker</name>
<value>hostname:8021</value>
</property>
<property>
<name>broker.impl.class</name>
<value>org.apache.activemq.ActiveMQConnectionFactory</value>
</property>
<property>
<name>entityName</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>falconTargetDatabase</name>
<value>default</value>
</property>
<property>
<name>falconInputFeeds</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>userWorkflowVersion</name>
<value>0.6-incubating-SNAPSHOT-r623ee4a663d5b3ba5353f7357c08784a6f5222d9</value>
</property>
<property>
<name>cluster</name>
<value>hcat-cluster2</value>
</property>
<property>
<name>colo.name</name>
<value>hcolo2</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>srcClusterColo</name>
<value>hcolo1</value>
</property>
<property>
<name>sourceRelativePaths</name>
<value>IGNORE</value>
</property>
<property>
<name>falconSourceDatabase</name>
<value>default</value>
</property>
<property>
<name>jobTracker</name>
<value>hostname:8021</value>
</property>
</configuration>
{noformat}
> Feed Replication workflows are failing
> --------------------------------------
>
> Key: FALCON-437
> URL: https://issues.apache.org/jira/browse/FALCON-437
> Project: Falcon
> Issue Type: Bug
> Affects Versions: 0.5, 0.6
> Reporter: Satish Mittal
> Priority: Blocker
>
> With latest trunk, feed replication workflows are failing with
> ClassNotFoundException for falcon lib classes (LateDataHandler and
> FalconPostProcessing).
> Noticed that in mapred.job.classpath.files conf property of replication jobs,
> none of falcon workflow lib jars are getting added. Hence the exception.
> Also noticed that when feed is scheduled, the lib folder is not getting
> created on HDFS at default appPath location.
> e.g. when replication appPath is:
> {noformat}
> ${nameNode}/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1
> {noformat}
> the lib folder is getting created at:
> {noformat}
> /projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION
> {noformat}
> It looks to be a regression due to FALCON-390.
--
This message was sent by Atlassian JIRA
(v6.2#6252)