[
https://issues.apache.org/jira/browse/FALCON-437?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13993784#comment-13993784
]
Satish Mittal commented on FALCON-437:
--------------------------------------
Coord definition for an hcat feed replication:
{noformat}
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<coordinator-app xmlns="uri:oozie:coordinator:0.3"
name="FALCON_FEED_REPLICATION_hcat-in-repl3_hcat-cluster1"
frequency="${coord:minutes(30)}" start="2014-05-08T10:00Z"
end="2030-01-01T00:00Z" timezone="UTC">
<controls>
<timeout>180</timeout>
<concurrency>1</concurrency>
<execution>FIFO</execution>
<throttle>12</throttle>
</controls>
<datasets>
<dataset name="input-dataset" frequency="${coord:minutes(30)}"
initial-instance="2014-05-08T10:00Z" timezone="UTC">
<uri-template>hcat://hostname:5055/default/table3/year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR};minute=${MINUTE}</uri-template>
<done-flag></done-flag>
</dataset>
<dataset name="output-dataset" frequency="${coord:minutes(30)}"
initial-instance="2014-05-08T10:00Z" timezone="UTC">
<uri-template>hcat://hostname:5055/default/table4/year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR};minute=${MINUTE}</uri-template>
</dataset>
</datasets>
<input-events>
<data-in name="input" dataset="input-dataset">
<instance>${coord:current(0)}</instance>
</data-in>
</input-events>
<output-events>
<data-out name="output" dataset="output-dataset">
<instance>${coord:current(0)}</instance>
</data-out>
</output-events>
<action>
<workflow>
<app-path>${nameNode}/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1</app-path>
<configuration>
<property>
<name>falconTargetNameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>shouldRecord</name>
<value>true</value>
</property>
<property>
<name>timeStamp</name>
<value>${coord:formatTime(coord:actualTime(),
'yyyy-MM-dd-HH-mm')}</value>
</property>
<property>
<name>falconTargetTable</name>
<value>table4</value>
</property>
<property>
<name>falconInPaths</name>
<value>${coord:dataIn('input')}</value>
</property>
<property>
<name>broker.url</name>
<value>tcp://localhost:61616</value>
</property>
<property>
<name>feedNames</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>falconSourceNameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>entityType</name>
<value>feed</value>
</property>
<property>
<name>nominalTime</name>
<value>${coord:formatTime(coord:nominalTime(),
'yyyy-MM-dd-HH-mm')}</value>
</property>
<property>
<name>falconTargetJobTracker</name>
<value>hostname:8021</value>
</property>
<property>
<name>feedInstancePaths</name>
<value>${coord:dataOut('output')}</value>
</property>
<property>
<name>falconTargetPartition</name>
<value>${coord:dataInPartitionFilter('input',
'hive')}</value>
</property>
<property>
<name>logDir</name>
<value>${nameNode}/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/logs</value>
</property>
<property>
<name>userWorkflowEngine</name>
<value>falcon</value>
</property>
<property>
<name>falconInputFeedStorageTypes</name>
<value>TABLE</value>
</property>
<property>
<name>broker.ttlInMins</name>
<value>4320</value>
</property>
<property>
<name>queueName</name>
<value>default</value>
</property>
<property>
<name>oozie.wf.external.id</name>
<value>hcat-in-repl3/REPLICATION/${coord:nominalTime()}</value>
</property>
<property>
<name>falconSourceHcatNode</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>distcpTargetPaths</name>
<value>hdfs://hostname:9000//projects/falcon/hcolo2/staging/FALCON_FEED_REPLICATION_hcat-in-repl3_hcat-cluster2/default/table4/year=${coord:dataOutPartitionValue('output','year')}/${coord:formatTime(coord:nominalTime(),
'yyyy-MM-dd-HH-mm')}/hcat-cluster2/data</value>
</property>
<property>
<name>falconSourcePartition</name>
<value>${coord:dataInPartitionFilter('input',
'hive')}</value>
</property>
<property>
<name>workflowEngineUrl</name>
<value>http://localhost:11000/oozie/</value>
</property>
<property>
<name>falconFeedStorageType</name>
<value>TABLE</value>
</property>
<property>
<name>userBrokerImplClass</name>
<value>org.apache.activemq.ActiveMQConnectionFactory</value>
</property>
<property>
<name>falconSourceJobTracker</name>
<value>hostname:8021</value>
</property>
<property>
<name>distcpSourcePaths</name>
<value>hdfs://hostname:9000//projects/falcon/hcolo1/staging/FALCON_FEED_REPLICATION_hcat-in-repl3_hcat-cluster1/default/table3/year=${coord:dataOutPartitionValue('output','year')}/${coord:formatTime(coord:nominalTime(),
'yyyy-MM-dd-HH-mm')}/hcat-cluster2/data</value>
</property>
<property>
<name>broker.impl.class</name>
<value>org.apache.activemq.ActiveMQConnectionFactory</value>
</property>
<property>
<name>falconSourceTable</name>
<value>table3</value>
</property>
<property>
<name>userWorkflowName</name>
<value>replication-policy</value>
</property>
<property>
<name>entityName</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>mapBandwidthKB</name>
<value>102400</value>
</property>
<property>
<name>srcClusterName</name>
<value>hcat-cluster1</value>
</property>
<property>
<name>userBrokerUrl</name>
<value>tcp://localhost:61616?daemon=true</value>
</property>
<property>
<name>falconInputFeeds</name>
<value>hcat-in-repl3</value>
</property>
<property>
<name>falconTargetDatabase</name>
<value>default</value>
</property>
<property>
<name>userWorkflowVersion</name>
<value>0.6-incubating-SNAPSHOT-r623ee4a663d5b3ba5353f7357c08784a6f5222d9</value>
</property>
<property>
<name>falconTargetHcatNode</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>cluster</name>
<value>hcat-cluster2</value>
</property>
<property>
<name>jobPriority</name>
<value>NORMAL</value>
</property>
<property>
<name>srcClusterColo</name>
<value>hcolo1</value>
</property>
<property>
<name>sourceRelativePaths</name>
<value>IGNORE</value>
</property>
<property>
<name>falconSourceDatabase</name>
<value>default</value>
</property>
<property>
<name>maxMaps</name>
<value>5</value>
</property>
</configuration>
</workflow>
</action>
</coordinator-app>
{noformat}
Coordinator job configuration:
{noformat}
<configuration>
<property>
<name>oozie.coord.application.path</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1.xml</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>falcon.libpath</name>
<value>/projects/falcon/hcolo2/working/lib</value>
</property>
<property>
<name>hcatNode</name>
<value>hcat://hostname:5055</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>oozie.bundle.application.path</name>
<value>hdfs://hostname:9000/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032</value>
</property>
<property>
<name>hcat.metastore.uri</name>
<value>thrift://hostname:5055</value>
</property>
<property>
<name>user.name</name>
<value>falcon</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.bundle.id</name>
<value>0000009-140508101950577-oozie-oozi-B</value>
</property>
<property>
<name>colo.name</name>
<value>hcolo2</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://hostname:9000</value>
</property>
<property>
<name>jobTracker</name>
<value>hostname:8021</value>
</property>
</configuration>
{noformat}
> Feed Replication workflows are failing
> --------------------------------------
>
> Key: FALCON-437
> URL: https://issues.apache.org/jira/browse/FALCON-437
> Project: Falcon
> Issue Type: Bug
> Affects Versions: 0.5, 0.6
> Reporter: Satish Mittal
> Priority: Blocker
>
> With latest trunk, feed replication workflows are failing with
> ClassNotFoundException for falcon lib classes (LateDataHandler and
> FalconPostProcessing).
> Noticed that in mapred.job.classpath.files conf property of replication jobs,
> none of falcon workflow lib jars are getting added. Hence the exception.
> Also noticed that when feed is scheduled, the lib folder is not getting
> created on HDFS at default appPath location.
> e.g. when replication appPath is:
> {noformat}
> ${nameNode}/projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION/hcat-cluster1
> {noformat}
> the lib folder is getting created at:
> {noformat}
> /projects/falcon/hcolo2/staging/falcon/workflows/feed/hcat-in-repl3/1399545050032/REPLICATION
> {noformat}
> It looks to be a regression due to FALCON-390.
--
This message was sent by Atlassian JIRA
(v6.2#6252)