[ https://issues.apache.org/jira/browse/FLUME-2392?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14012197#comment-14012197 ]
wolfgang hoschek commented on FLUME-2392: ----------------------------------------- The dependencies have been made “optional” in flume-ng-sinks/flume-ng-morphline-solr-sink/pom.xml via <optional>true</optional>, thus the dependencies don’t ship automatically with the build. Here is a flume-centric way of getting hold of all the jars and plugging them into flume: https://groups.google.com/a/cloudera.org/d/msg/cdk-dev/7T4pTebdWN4/sBHGkoS70LkJ > Flume MorphlineSink can not work because of java.lang.NoClassDefFoundError: > org/kitesdk/morphline/api/MorphlineCompilationException > ----------------------------------------------------------------------------------------------------------------------------------- > > Key: FLUME-2392 > URL: https://issues.apache.org/jira/browse/FLUME-2392 > Project: Flume > Issue Type: Improvement > Components: Sinks+Sources > Affects Versions: v1.5.0 > Reporter: liyunzhang > Fix For: v1.6.0 > > > Test Flume+Solr, use flume 1.5 + solr 4.6. > You can reproduce it by following steps > 1. Download > http://www.apache.org/dyn/closer.cgi/flume/1.5.0/apache-flume-1.5.0-bin.tar.gz > 2. install: extract to /usr/lib/apache-flume-1.5.0-bin > 3. create > /usr/lib/apache-flume-1.5.0-bin/conf/flume-conf-morphlineSolr.properties > #cat /usr/lib/apache-flume-1.5.0-bin/conf/flume-conf-morphlineSolr.properties > a1.channels = c1 > a1.sources = r1 > a1.sinks = k1 > a1.channels.c1.type = memory > a1.sources.r1.channels = c1 > a1.sources.r1.type = exec > a1.sources.r1.command = tail -F /var/log/a1.new.log > a1.sinks.k1.channel = c1 > a1.sinks.k1.type = org.apache.flume.sink.solr.morphline.MorphlineSolrSink > a1.sinks.k1.morphlineFile = > /usr/lib/apache-flume-1.5.0-bin/conf/morphline.conf > a1.channels.MemChannel.type = memory > a1.channels.MemChannel.capacity = 10000 > a1.channels.MemChannel.transactionCapacity = 100 > 4. create /usr/lib/apache-flume-1.5.0-bin/conf/morphline.conf > #cat /usr/lib/apache-flume-1.5.0-bin/conf/morphline.conf > morphlines : [ > { > # Name used to identify a morphline. E.g. used if there are multiple > # morphlines in a morphline config file > id : morphline1 > # Import all morphline commands in these java packages and their > # subpackages. Other commands that may be present on the classpath are > # not visible to this morphline. > importCommands : ["com.cloudera.**", "org.apache.solr.**"] > > commands : [ > { > # Parse input attachment and emit a record for each input line > readLine { > charset : UTF-8 > } > } > { > grok { > # Consume the output record of the previous command and pipe another > # record downstream. > # > # A grok-dictionary is a config file that contains prefabricated > # regular expressions that can be referred to by name. grok patterns > # specify such a regex name, plus an optional output field name. > # The syntax is %{REGEX_NAME:OUTPUT_FIELD_NAME} > # The input line is expected in the "message" input field. > #dictionaryFiles : [src/test/resources/grok-dictionaries] > dictionaryFiles : [/etc/flume/conf/grok-dictionaries] > expressions : { > message : """%{TIMESTAMP_LOG:timestamp} %{LOGLEVEL:loglevel} > %{DATA:classname}: %{GREEDYDATA:msg}""" > } > } > } > # Consume the output record of the previous command, convert > # the timestamp, and pipe another record downstream. > # > # convert timestamp field to native Solr timestamp format > # e.g. 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z > { > convertTimestamp { > field : timestamp > inputFormats : ["yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd > HH:mm:ss,SSS"] > inputTimezone : America/Los_Angeles > outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" > outputTimezone : UTC > } > } > { > generateUUID { > field : id > } > } > # Consume the output record of the previous command, transform it > # and pipe the record downstream. > # > # This command deletes record fields that are unknown to Solr > # schema.xml. Recall that Solr throws an exception on any attempt to > # load a document that contains a field that isn't specified in > # schema.xml. > { > sanitizeUnknownSolrFields { > # Location from which to fetch Solr schema > solrLocator : { > collection : collection1 # Name of solr collection > zkHost : "127.0.0.1:2181/" # ZooKeeper ensemble > } > } > } > # log the record at INFO level to SLF4J > { logInfo { format : "output record: {}", args : ["@{}"] } } > # load the record into a Solr server or MapReduce Reducer > { > loadSolr { > solrLocator : { > collection : collection1 # Name of solr collection > zkHost : "127.0.0.1:2181/" # ZooKeeper ensemble > } > } > } > ] > } > ] > 5. start flume: > # cd /usr/lib/apache-flume-1.5.0-bin > #./bin/flume-ng agent --conf conf --conf-file > conf/flume-conf-morphlineSolr.properties --name a1 > folollowing error message found in log: > 29 May 2014 16:09:18,652 ERROR [lifecycleSupervisor-1-1] > (org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run:253) - > Unable to start SinkRunner: { > policy:org.apache.flume.sink.DefaultSinkProcessor@c0880a8 counterGroup:{ > name:null counters:{} } } - Exception follows. > java.lang.NoClassDefFoundError: > org/kitesdk/morphline/api/MorphlineCompilationException > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:190) > at > org.apache.flume.sink.solr.morphline.MorphlineSink.start(MorphlineSink.java:93) > at > org.apache.flume.sink.DefaultSinkProcessor.start(DefaultSinkProcessor.java:46) > at org.apache.flume.SinkRunner.start(SinkRunner.java:79) > at > org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:251) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:304) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:178) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > Caused by: java.lang.ClassNotFoundException: > org.kitesdk.morphline.api.MorphlineCompilationException > at java.net.URLClassLoader$1.run(URLClassLoader.java:366) > at java.net.URLClassLoader$1.run(URLClassLoader.java:355) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(URLClassLoader.java:354) > at java.lang.ClassLoader.loadClass(ClassLoader.java:425) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) > at java.lang.ClassLoader.loadClass(ClassLoader.java:358) > ... 13 more -- This message was sent by Atlassian JIRA (v6.2#6252)