[ https://issues.apache.org/jira/browse/NIFI-615?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15344205#comment-15344205 ]
ASF GitHub Bot commented on NIFI-615: ------------------------------------- Github user jskora commented on a diff in the pull request: https://github.com/apache/nifi/pull/556#discussion_r68041847 --- Diff: nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/media/ExtractMediaMetadata.java --- @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.media; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Pattern; + +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.components.Validator; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ProcessorLog; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.util.ObjectHolder; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.SAXException; + +@InputRequirement(Requirement.INPUT_REQUIRED) +@Tags({"media", "file", "format", "metadata", "audio", "video", "image", "document", "pdf"}) +@CapabilityDescription("Extract the content metadata from flowfiles containing audio, video, image, and other file " + + "types. This processor relies on the Apache Tika project for file format detection and parsing. It " + + "extracts a long list of metadata types for media files including audio, video, and print media " + + "formats." + + "For the more details and the list of supported file types, visit the library's website " + + "at http://tika.apache.org/.") +@WritesAttributes({@WritesAttribute(attribute = "<Metadata Key Prefix>.<attribute>", description = "The extracted content metadata " + + "will be inserted with the attribute name \"<Metadata Key Prefix>.<attribute>\", or \"<attribute>\" if " + + "\"Metadata Key Prefix\" is not provided.")}) +@SupportsBatching +public class ExtractMediaMetadata extends AbstractProcessor { + + static final PropertyDescriptor MAX_NUMBER_OF_ATTRIBUTES = new PropertyDescriptor.Builder() + .name("Max Number of Attributes") + .description("Specify the max number of attributes to add to the flowfile. There is no guarantee in what order" + + " the tags will be processed. By default it will process all of them.") + .required(false) + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .build(); + + private static final PropertyDescriptor MAX_ATTRIBUTE_LENGTH = new PropertyDescriptor.Builder() + .name("Max Attribute Length") + .description("Specifies the maximum length of a single attribute value. When a metadata item has multiple" + + " values, they will be merged until this length is reached and then \", ...\" will be added as" + + " an indicator that additional values where dropped. If a single value is longer than this, it" + + " will be truncated and \"(truncated)\" appended to indicate that truncation occurred.") + .required(true) + .defaultValue("10240") + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .build(); + + static final PropertyDescriptor MIME_TYPE_FILTER = new PropertyDescriptor.Builder() + .name("MIME Type Filter") + .description("A regular expression identifying MIME types for which metadata should extracted. Flowfiles" + + " selected for scanning by the File Name Filter are parsed to determine the MIME type and extract" + + " metadata. If the MIME type found matches this regular expression or this expression is" + + " blank, the metadata keys that match the Metadata Key Filter will be added to the flowfile" + + " attributes. There is no guarantee in what order attributes will be produced. If" + + " left blank, metadata will be extracted from all flow files selected for scanning.") + .required(false) + .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR) + .build(); + + static final PropertyDescriptor METADATA_KEY_FILTER = new PropertyDescriptor.Builder() + .name("Metadata Key Filter") + .description("A regular expression identifying which metadata keys received from the parser should be" + + " added to the flowfile attributes. If left blank, all metadata keys parsed will be added to the" + + " flowfile attributes.") + .required(false) + .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR) + .build(); + + static final PropertyDescriptor METADATA_KEY_PREFIX = new PropertyDescriptor.Builder() + .name("Metadata Key Prefix") + .description("Text to be prefixed to metadata keys as the are added to the flowfile attributes. It is" + + " recommended to end with with a separator character like '.' or '-', this is not automatically " + + " added by the processor.") + .required(false) + .addValidator(StandardValidators.ATTRIBUTE_KEY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + static final PropertyDescriptor CONTENT_BUFFER_SIZE = new PropertyDescriptor.Builder() + .name("Content Buffer Size") + .description("The size for media content buffer during processing, or -1 for unlimited. If not" + + " provided, the underlying parser default is used.") + .required(false) + .addValidator(StandardValidators.INTEGER_VALIDATOR) + .addValidator(new Validator() { + @Override + public ValidationResult validate(String subject, String input, ValidationContext context) { + Integer val = null; + try { + val = Integer.parseInt(input); + } catch (NumberFormatException ignore) { + } + return new ValidationResult.Builder() + .subject(subject) + .input(input) + .valid(val != null && val >= -1) + .explanation(subject + " must be a valid integer equal to or greater than -1.") + .build(); + } + }) + .expressionLanguageSupported(true) --- End diff -- I removed EL support since the buffer size should be fixed not dynamic. > Create a processor to extract WAV file characteristics > ------------------------------------------------------ > > Key: NIFI-615 > URL: https://issues.apache.org/jira/browse/NIFI-615 > Project: Apache NiFi > Issue Type: Improvement > Reporter: Brandon DeVries > Assignee: Joe Skora > Priority: Minor > Fix For: 1.0.0, 0.7.0 > > > Create a processor to extract information from a WAV file, including > encoding, bit rate, metadata, etc... -- This message was sent by Atlassian JIRA (v6.3.4#6332)