[ https://issues.apache.org/jira/browse/NIFI-1899?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15350465#comment-15350465 ]
ASF GitHub Bot commented on NIFI-1899: -------------------------------------- Github user ijokarumawak commented on a diff in the pull request: https://github.com/apache/nifi/pull/483#discussion_r68526891 --- Diff: nifi-nar-bundles/nifi-email-bundle/nifi-email-processors/src/main/java/org/apache/nifi/processors/email/ExtractEmailHeaders.java --- @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.email; + + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.mail.util.MimeMessageParser; +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.nifi.annotation.behavior.SideEffectFree; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.stream.io.BufferedInputStream; + +import javax.mail.Address; +import javax.mail.Header; +import javax.mail.Message; +import javax.mail.MessagingException; +import javax.mail.Session; +import javax.mail.internet.MimeMessage; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + + +@EventDriven +@SideEffectFree +@Tags({"split", "email"}) +@InputRequirement(Requirement.INPUT_REQUIRED) +@CapabilityDescription("Using the flowfile content as source of data, extract header from an RFC compliant email file adding the relevant attributes to the flowfile. " + + "This processor does not perform extensive RFC validation but still requires a bare minimum compliance with RFC 2822") +@WritesAttributes({ + @WritesAttribute(attribute = "email.headers.bcc.*", description = "Each individual BCC recipient (if available)"), + @WritesAttribute(attribute = "email.headers.cc.*", description = "Each individual CC recipient (if available)"), + @WritesAttribute(attribute = "email.headers.from.*", description = "Each individual mailbox contained in the From of the Email (array as per RFC-2822)"), + @WritesAttribute(attribute = "email.headers.message-id", description = "The value of the Message-ID header (if available)"), + @WritesAttribute(attribute = "email.headers.received_date", description = "The Received-Date of the message (if available)"), + @WritesAttribute(attribute = "email.headers.sent_date", description = "Date the message was sent"), + @WritesAttribute(attribute = "email.headers.subject", description = "Subject of the message (if available)"), + @WritesAttribute(attribute = "email.headers.to.*", description = "Each individual TO recipient (if available)"), + @WritesAttribute(attribute = "email.attachment_count", description = "Number of attachments of the message" )}) + +public class ExtractEmailHeaders extends AbstractProcessor { + public static final String EMAIL_HEADER_BCC = "email.headers.bcc"; + public static final String EMAIL_HEADER_CC = "email.headers.cc"; + public static final String EMAIL_HEADER_FROM = "email.headers.from"; + public static final String EMAIL_HEADER_MESSAGE_ID = "email.headers.message-id"; + public static final String EMAIL_HEADER_RECV_DATE = "email.headers.received_date"; + public static final String EMAIL_HEADER_SENT_DATE = "email.headers.sent_date"; + public static final String EMAIL_HEADER_SUBJECT = "email.headers.subject"; + public static final String EMAIL_HEADER_TO = "email.headers.to"; + public static final String EMAIL_ATTACHMENT_COUNT = "email.attachment_count"; + + public static final PropertyDescriptor CAPTURED_HEADERS = new PropertyDescriptor.Builder() + .name("Header List") + .description("COLON separated list of headers to be extracted from the flowfile content. " + + "NOTE the header key is case insensitive and will be matched as lower-case." + + " Values will respect email email contents.") + .required(false) + .expressionLanguageSupported(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .defaultValue("x-mailer") + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("Extraction was successful") + .build(); + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("Flowfiles that could not be parsed as a RFC-2822 compliant message") + .build(); + + private Set<Relationship> relationships; + private List<PropertyDescriptor> descriptors; + + @Override + protected void init(final ProcessorInitializationContext context) { + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + this.relationships = Collections.unmodifiableSet(relationships); + + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + + descriptors.add(CAPTURED_HEADERS); + this.descriptors = Collections.unmodifiableList(descriptors); + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) { + final ComponentLog logger = getLogger(); + final Map<String, String> attributes = new HashMap<>(); + final FlowFile originalFlowFile = session.get(); + if (originalFlowFile == null) { + return; + } + + + final List<String> capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":")); + + + session.read(originalFlowFile, new InputStreamCallback() { + @Override + public void process(final InputStream rawIn) throws IOException { + try (final InputStream in = new BufferedInputStream(rawIn)) { + Properties props = new Properties(); + Session mailSession = Session.getDefaultInstance(props, null); + MimeMessage originalMessage = new MimeMessage(mailSession, in); + MimeMessageParser parser = new MimeMessageParser(originalMessage).parse(); + // RFC-2822 determines that a message must have a "From:" header + // if a message lacks the field, it is flagged as invalid + Address[] from = originalMessage.getFrom(); + Date sentDate = originalMessage.getSentDate(); + if (from == null || sentDate == null ) { + // See ya later my friend... + throw new MessagingException("Message failed RFC2822 validation"); --- End diff -- This can be sent to FAILURE relationship, so that the same message will not stay in the buffer. > Create ListenSMTP & ExtractEmailAttachment processors > ----------------------------------------------------- > > Key: NIFI-1899 > URL: https://issues.apache.org/jira/browse/NIFI-1899 > Project: Apache NiFi > Issue Type: New Feature > Reporter: Andre > -- This message was sent by Atlassian JIRA (v6.3.4#6332)