Updated Branches: refs/heads/trunk 296fc9f92 -> cf6298415
http://git-wip-us.apache.org/repos/asf/flume/blob/cf629841/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/resources/test-morphlines/solrCellDocumentTypes.conf ---------------------------------------------------------------------- diff --git a/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/resources/test-morphlines/solrCellDocumentTypes.conf b/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/resources/test-morphlines/solrCellDocumentTypes.conf new file mode 100644 index 0000000..2574144 --- /dev/null +++ b/flume-ng-sinks/flume-ng-morphline-solr-sink/src/test/resources/test-morphlines/solrCellDocumentTypes.conf @@ -0,0 +1,260 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Application configuration file in HOCON format (Human-Optimized Config Object Notation). +# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md +# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). +# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html + +# morphline.conf example file +# this is a comment +// this is yet another comment + +morphlines : [ + { + id : morphline1 + importCommands : ["com.cloudera.**", "org.apache.solr.**"] + + commands : [ + { separateAttachments {} } + + # java command that doesn't do anything except for test compilation + { + java { + imports : "import java.util.*;" + code: """ + List tags = record.get("javaWithImports"); + return child.process(record); + """ + } + } + + # java command that doesn't do anything except for test compilation + { + java { + code: """ + List tags = record.get("javaWithoutImports"); + return child.process(record); + """ + } + } + + { + # used for auto-detection if MIME type isn't explicitly supplied + detectMimeType { + includeDefaultMimeTypes : true + mimeTypesFiles : [target/test-classes/custom-mimetypes.xml] + } + } + + { + tryRules { + throwExceptionIfAllRulesFailed : true + rules : [ + # next top-level rule: + { + commands : [ + { logDebug { format : "hello unpack" } } + { unpack {} } + { generateUUID {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { logDebug { format : "hello decompress" } } + { decompress {} } + { callParentPipe {} } + ] + } + + { + commands : [ + { + readAvroContainer { + supportedMimeTypes : [avro/binary] + # readerSchemaString : "<json can go here>" # optional, avro json schema blurb for getSchema() + # readerSchemaFile : /path/to/syslog.avsc + } + } + + { extractAvroTree {} } + + { + setValues { + id : "@{/id}" + user_screen_name : "@{/user_screen_name}" + text : "@{/text}" + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + { + commands : [ + { + readJsonTestTweets { + supportedMimeTypes : ["mytwittertest/json+delimited+length"] + } + } + + { + sanitizeUnknownSolrFields { + solrLocator : ${SOLR_LOCATOR} + } + } + ] + } + + # next top-level rule: + { + commands : [ + { logDebug { format : "hello solrcell" } } + { + # wrap SolrCell around an HTML Tika parser + solrCell { + solrLocator : ${SOLR_LOCATOR} + # captureAttr : true # default is false + capture : [ + + # twitter feed schema + user_friends_count + user_location + user_description + user_statuses_count + user_followers_count + user_name + user_screen_name + created_at + text + retweet_count + retweeted + in_reply_to_user_id + source + in_reply_to_status_id + media_url_https + expanded_url + + # file metadata + file_download_url + file_upload_url + file_scheme + file_host + file_port + file_path + file_name + file_length + file_last_modified + file_owner + file_group + file_permissions_user + file_permissions_group + file_permissions_other + file_permissions_stickybit + ] + + fmap : { content : text, content-type : content_type } # rename "content" field to "text" fields + dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat + # xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" + uprefix : "ignored_" + lowernames : true + # solrContentHandlerFactory : org.apache.solr.tika.TrimSolrContentHandlerFactory + + # Tika parsers to be registered. If multiple parsers support the same MIME type, + # the parser is chosen that is closest to the bottom in this list: + parsers : [ + { parser : org.apache.tika.parser.asm.ClassParser } + # { parser : org.gagravarr.tika.OggParser, additionalSupportedMimeTypes : [audio/ogg] } + { parser : org.gagravarr.tika.FlacParser } + { parser : org.apache.tika.parser.audio.AudioParser } + { parser : org.apache.tika.parser.audio.MidiParser } + { parser : org.apache.tika.parser.crypto.Pkcs7Parser } + { parser : org.apache.tika.parser.dwg.DWGParser } + { parser : org.apache.tika.parser.epub.EpubParser } + { parser : org.apache.tika.parser.executable.ExecutableParser } + { parser : org.apache.tika.parser.feed.FeedParser } + { parser : org.apache.tika.parser.font.AdobeFontMetricParser } + { parser : org.apache.tika.parser.font.TrueTypeParser } + { parser : org.apache.tika.parser.xml.XMLParser } + { parser : org.apache.tika.parser.html.HtmlParser } + { parser : org.apache.tika.parser.image.ImageParser } + { parser : org.apache.tika.parser.image.PSDParser } + { parser : org.apache.tika.parser.image.TiffParser } + { parser : org.apache.tika.parser.iptc.IptcAnpaParser } + { parser : org.apache.tika.parser.iwork.IWorkPackageParser } + { parser : org.apache.tika.parser.jpeg.JpegParser } + { parser : org.apache.tika.parser.mail.RFC822Parser } + { parser : org.apache.tika.parser.mbox.MboxParser, additionalSupportedMimeTypes : [message/x-emlx] } + { parser : org.apache.tika.parser.microsoft.OfficeParser } + { parser : org.apache.tika.parser.microsoft.TNEFParser } + { parser : org.apache.tika.parser.microsoft.ooxml.OOXMLParser } + { parser : org.apache.tika.parser.mp3.Mp3Parser } + { parser : org.apache.tika.parser.mp4.MP4Parser } + { parser : org.apache.tika.parser.hdf.HDFParser } + { parser : org.apache.tika.parser.netcdf.NetCDFParser } + { parser : org.apache.tika.parser.odf.OpenDocumentParser } + { parser : org.apache.tika.parser.pdf.PDFParser } + { parser : org.apache.tika.parser.pkg.CompressorParser } + { parser : org.apache.tika.parser.pkg.PackageParser } + { parser : org.apache.tika.parser.rtf.RTFParser } + { parser : org.apache.tika.parser.txt.TXTParser } + { parser : org.apache.tika.parser.video.FLVParser } + { parser : org.apache.tika.parser.xml.DcXMLParser } + { parser : org.apache.tika.parser.xml.FictionBookParser } + { parser : org.apache.tika.parser.chm.ChmParser } + ] + } + } + + { generateUUID { field : ignored_base_id } } + + { + generateSolrSequenceKey { + baseIdField: ignored_base_id + solrLocator : ${SOLR_LOCATOR} + } + } + + ] + } + ] + } + } + + { + loadSolr { + solrLocator : ${SOLR_LOCATOR} + } + } + + { + logDebug { + format : "My output record: {}" + args : ["@{}"] + } + } + + ] + } +] http://git-wip-us.apache.org/repos/asf/flume/blob/cf629841/flume-ng-sinks/pom.xml ---------------------------------------------------------------------- diff --git a/flume-ng-sinks/pom.xml b/flume-ng-sinks/pom.xml index 7170348..3ee75e5 100644 --- a/flume-ng-sinks/pom.xml +++ b/flume-ng-sinks/pom.xml @@ -45,5 +45,6 @@ limitations under the License. <module>flume-irc-sink</module> <module>flume-ng-hbase-sink</module> <module>flume-ng-elasticsearch-sink</module> + <module>flume-ng-morphline-solr-sink</module> </modules> </project> http://git-wip-us.apache.org/repos/asf/flume/blob/cf629841/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 8026936..251f345 100644 --- a/pom.xml +++ b/pom.xml @@ -549,7 +549,7 @@ limitations under the License. <exclude>**/*.avsc</exclude> <exclude>**/*.avro</exclude> <exclude>**/docs/**</exclude> - <exclude>**/test/resources/test_command.txt</exclude> + <exclude>**/test/resources/**</exclude> <exclude>**/.settings/*</exclude> <exclude>**/.classpath</exclude> <exclude>**/.project</exclude> @@ -942,6 +942,12 @@ limitations under the License. </dependency> <dependency> + <groupId>org.apache.flume.flume-ng-sinks</groupId> + <artifactId>flume-ng-morphline-solr-sink</artifactId> + <version>1.4.0-SNAPSHOT</version> + </dependency> + + <dependency> <groupId>org.apache.flume.flume-ng-sources</groupId> <artifactId>flume-scribe-source</artifactId> <version>1.4.0-SNAPSHOT</version>
