NIFI-505: Initial import of language translation nar
Project: http://git-wip-us.apache.org/repos/asf/incubator-nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-nifi/commit/178c5cd2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-nifi/tree/178c5cd2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-nifi/diff/178c5cd2 Branch: refs/heads/NIFI-271 Commit: 178c5cd287eed734bd3d93665df27682db941a8b Parents: ff0bd2c Author: Mark Payne <marka...@hotmail.com> Authored: Thu Apr 9 17:55:33 2015 -0400 Committer: Mark Payne <marka...@hotmail.com> Committed: Thu Apr 9 17:55:33 2015 -0400 ---------------------------------------------------------------------- .../nifi-language-translation-nar/pom.xml | 36 ++ .../nifi-yandex-processors/.gitignore | 1 + .../nifi-yandex-processors/pom.xml | 63 ++++ .../nifi/processors/yandex/YandexTranslate.java | 325 +++++++++++++++++++ .../processors/yandex/model/Translation.java | 52 +++ .../nifi/processors/yandex/util/Languages.java | 86 +++++ .../yandex/util/ObjectMapperResolver.java | 48 +++ .../org.apache.nifi.processor.Processor | 16 + .../processors/yandex/TestYandexTranslate.java | 141 ++++++++ .../nifi-language-translation-bundle/pom.xml | 48 +++ 10 files changed, 816 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-language-translation-nar/pom.xml ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-language-translation-nar/pom.xml b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-language-translation-nar/pom.xml new file mode 100644 index 0000000..4d8b790 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-language-translation-nar/pom.xml @@ -0,0 +1,36 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-language-translation-bundle</artifactId> + <version>0.1.0-incubating-SNAPSHOT</version> + </parent> + + <artifactId>nifi-language-translation-nar</artifactId> + <packaging>nar</packaging> + + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-yandex-processors</artifactId> + <version>0.1.0-incubating-SNAPSHOT</version> + </dependency> + </dependencies> + +</project> http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore @@ -0,0 +1 @@ +/target/ http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/pom.xml ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/pom.xml b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/pom.xml new file mode 100644 index 0000000..a5f9f0e --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/pom.xml @@ -0,0 +1,63 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-language-translation-bundle</artifactId> + <version>0.1.0-incubating-SNAPSHOT</version> + </parent> + + <artifactId>nifi-yandex-processors</artifactId> + <packaging>jar</packaging> + + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-processor-utils</artifactId> + </dependency> + + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-client</artifactId> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-mock</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-simple</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/YandexTranslate.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/YandexTranslate.java b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/YandexTranslate.java new file mode 100644 index 0000000..a5eecc6 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/YandexTranslate.java @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.yandex; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; + +import org.apache.nifi.annotation.behavior.DynamicProperty; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.annotation.lifecycle.OnStopped; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.components.Validator; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.io.OutputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processors.yandex.model.Translation; +import org.apache.nifi.processors.yandex.util.Languages; +import org.apache.nifi.processors.yandex.util.ObjectMapperResolver; +import org.apache.nifi.stream.io.StreamUtils; +import org.apache.nifi.util.StopWatch; + +import com.sun.jersey.api.client.Client; +import com.sun.jersey.api.client.ClientResponse; +import com.sun.jersey.api.client.ClientResponse.Status; +import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.api.client.config.ClientConfig; +import com.sun.jersey.api.client.config.DefaultClientConfig; +import com.sun.jersey.api.json.JSONConfiguration; +import com.sun.jersey.core.util.MultivaluedMapImpl; + +@SupportsBatching +@Tags({"yandex", "translate", "translation", "language"}) +@CapabilityDescription("Translates content and attributes from one language to another") +@WritesAttributes({ + @WritesAttribute(attribute="yandex.translate.failure.reason", description="If the text cannot be translated, this attribute will be set indicating the reason for the failure"), + @WritesAttribute(attribute="language", description="When the translation succeeds, if the content was translated, this attribute will be set indicating the new language of the content") +}) +@DynamicProperty(name="The name of an attribute to set that will contain the translated text of the value", + value="The value to translate", + supportsExpressionLanguage=true, + description="User-defined properties are used to translate arbitrary text based on attributes.") +public class YandexTranslate extends AbstractProcessor { + + public static final PropertyDescriptor KEY = new PropertyDescriptor.Builder() + .name("Yandex API Key") + .description("The API Key that is registered with Yandex") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(true) + .build(); + public static final PropertyDescriptor SOURCE_LANGUAGE = new PropertyDescriptor.Builder() + .name("Input Language") + .description("The language of incoming data") + .required(true) + .defaultValue("sp") + .expressionLanguageSupported(true) + .addValidator(new LanguageNameValidator()) + .build(); + public static final PropertyDescriptor TARGET_LANGUAGE = new PropertyDescriptor.Builder() + .name("Target Language") + .description("The language to translate the text into") + .required(true) + .defaultValue("en") + .expressionLanguageSupported(true) + .addValidator(new LanguageNameValidator()) + .build(); + public static final PropertyDescriptor TRANSLATE_CONTENT = new PropertyDescriptor.Builder() + .name("Translate Content") + .description("Specifies whether or not the content should be translated. If false, only the text specified by user-defined properties will be translated.") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() + .name("Character Set") + .description("Specifies the character set of the data to be translated") + .required(true) + .defaultValue("UTF-8") + .expressionLanguageSupported(true) + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("This relationship is used when the translation is successful") + .build(); + public static final Relationship REL_COMMS_FAILURE = new Relationship.Builder() + .name("comms.failure") + .description("This relationship is used when the translation fails due to a problem such as a network failure, and for which the translation should be attempted again") + .build(); + public static final Relationship REL_TRANSLATION_FAILED = new Relationship.Builder() + .name("translation.failure") + .description("This relationship is used if the translation cannot be performed for some reason other than communications failure") + .build(); + + private List<PropertyDescriptor> descriptors; + private Set<Relationship> relationships; + + private volatile Client client; + + private static final String URL = "https://translate.yandex.net/api/v1.5/tr.json/translate"; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); + descriptors.add(KEY); + descriptors.add(SOURCE_LANGUAGE); + descriptors.add(TARGET_LANGUAGE); + descriptors.add(TRANSLATE_CONTENT); + descriptors.add(CHARACTER_SET); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_COMMS_FAILURE); + relationships.add(REL_TRANSLATION_FAILED); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + return new PropertyDescriptor.Builder() + .name(propertyDescriptorName) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .dynamic(true) + .build(); + } + + @Override + protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) { + final List<ValidationResult> results = new ArrayList<>(); + if ( validationContext.getProperty(TRANSLATE_CONTENT).asBoolean().equals(Boolean.FALSE) ) { + boolean foundDynamic = false; + for ( final PropertyDescriptor descriptor : validationContext.getProperties().keySet() ) { + if ( descriptor.isDynamic() ) { + foundDynamic = true; + break; + } + } + + if ( !foundDynamic ) { + results.add(new ValidationResult.Builder().subject("Text to translate").input("<none>").valid(false).explanation("Must either set 'Translate Content' to true or add at least one user-defined property").build()); + } + } + + return results; + } + + @OnScheduled + public void onScheduled(final ProcessContext context) { + final ClientConfig config = new DefaultClientConfig(); + config.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING, Boolean.TRUE); + config.getClasses().add(ObjectMapperResolver.class); + + client = Client.create(config); + } + + @OnStopped + public void destroyClient() { + if ( client != null ) { + client.destroy(); + } + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + FlowFile flowFile = session.get(); + if ( flowFile == null ) { + return; + } + + final StopWatch stopWatch = new StopWatch(true); + final String key = context.getProperty(KEY).getValue(); + final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue(); + final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue(); + final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue(); + + final List<String> attributeNames = new ArrayList<>(); + final List<String> textValues = new ArrayList<>(); + for ( final PropertyDescriptor descriptor : context.getProperties().keySet() ) { + if ( descriptor.isDynamic() ) { + attributeNames.add(descriptor.getName()); // add to list so that we know the order when the translations come back. + textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue()); + } + } + + if ( context.getProperty(TRANSLATE_CONTENT).asBoolean() ) { + final byte[] buff = new byte[(int) flowFile.getSize()]; + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream in) throws IOException { + StreamUtils.fillBuffer(in, buff); + } + }); + final String content = new String(buff, Charset.forName(encoding)); + textValues.add(content); + } + + WebResource webResource = client.resource(URL); + + final MultivaluedMap<String, String> paramMap = new MultivaluedMapImpl(); + paramMap.put("text", textValues); + paramMap.add("key", key); + paramMap.add("lang", sourceLanguage + "-" + targetLanguage); + + WebResource.Builder builder = webResource + .accept(MediaType.APPLICATION_JSON) + .type(MediaType.APPLICATION_FORM_URLENCODED); + builder = builder.entity(paramMap); + + final ClientResponse response; + try { + response = builder.post(ClientResponse.class); + } catch (final Exception e) { + getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] {flowFile, e}); + session.transfer(flowFile, REL_COMMS_FAILURE); + return; + } + + if ( response.getStatus() != Status.OK.getStatusCode() ) { + getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] { + flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName()}); + flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase()); + session.transfer(flowFile, REL_TRANSLATION_FAILED); + return; + } + + final Map<String, String> newAttributes = new HashMap<>(); + final Translation translation = response.getEntity(Translation.class); + final List<String> texts = translation.getText(); + for (int i=0; i < texts.size(); i++) { + final String text = texts.get(i); + if ( i < attributeNames.size() ) { + final String attributeName = attributeNames.get(i); + newAttributes.put(attributeName, text); + } else { + flowFile = session.write(flowFile, new OutputStreamCallback() { + @Override + public void process(final OutputStream out) throws IOException { + out.write(text.getBytes(encoding)); + } + }); + + newAttributes.put("language", targetLanguage); + } + } + + if ( !newAttributes.isEmpty() ) { + flowFile = session.putAllAttributes(flowFile, newAttributes); + } + + stopWatch.stop(); + session.transfer(flowFile, REL_SUCCESS); + getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] {texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration()}); + } + + + private static class LanguageNameValidator implements Validator { + + @Override + public ValidationResult validate(final String subject, final String input, final ValidationContext context) { + if ( context.isExpressionLanguagePresent(input) ) { + return new ValidationResult.Builder().subject(subject).input(input).valid(true).explanation("Expression Language Present").build(); + } + + if ( Languages.getLanguageMap().keySet().contains(input.toLowerCase()) ) { + return new ValidationResult.Builder().subject(subject).input(input).valid(true).build(); + } + + return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(input + " is not a language that is supported by Yandex").build(); + } + + } +} http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/model/Translation.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/model/Translation.java b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/model/Translation.java new file mode 100644 index 0000000..eeb7c3f --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/model/Translation.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.yandex.model; + +import java.util.List; + +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement(name = "translation") +public class Translation { + private int code; + private String lang; + private List<String> text; + + public int getCode() { + return code; + } + + public void setCode(final int code) { + this.code = code; + } + + public String getLang() { + return lang; + } + + public void setLang(final String lang) { + this.lang = lang; + } + + public List<String> getText() { + return text; + } + + public void setText(final List<String> text) { + this.text = text; + } +} http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/Languages.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/Languages.java b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/Languages.java new file mode 100644 index 0000000..791d6a3 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/Languages.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.yandex.util; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class Languages { + private static final Map<String, String> languageAbbreviationMap = new HashMap<>(); + + static { + languageAbbreviationMap.put("ar", "arabic"); + languageAbbreviationMap.put("az", "azerbaijani"); + languageAbbreviationMap.put("be", "belarusian"); + languageAbbreviationMap.put("bg", "bulgarian"); + languageAbbreviationMap.put("bs", "bosnian"); + languageAbbreviationMap.put("ca", "catalan"); + languageAbbreviationMap.put("cs", "czech"); + languageAbbreviationMap.put("da", "danish"); + languageAbbreviationMap.put("de", "german"); + languageAbbreviationMap.put("el", "greek"); + languageAbbreviationMap.put("en", "english"); + languageAbbreviationMap.put("es", "spanish"); + languageAbbreviationMap.put("et", "estonian"); + languageAbbreviationMap.put("fi", "finnish"); + languageAbbreviationMap.put("fr", "french"); + languageAbbreviationMap.put("he", "hebrew"); + languageAbbreviationMap.put("hr", "croatian"); + languageAbbreviationMap.put("hu", "hungarian"); + languageAbbreviationMap.put("hy", "armenian"); + languageAbbreviationMap.put("id", "indonesian"); + languageAbbreviationMap.put("is", "icelandic"); + languageAbbreviationMap.put("it", "italian"); + languageAbbreviationMap.put("ja", "japanese"); + languageAbbreviationMap.put("ka", "georgian"); + languageAbbreviationMap.put("ko", "korean"); + languageAbbreviationMap.put("lt", "lithuanian"); + languageAbbreviationMap.put("lv", "latvian"); + languageAbbreviationMap.put("mk", "macedonian"); + languageAbbreviationMap.put("ms", "malay"); + languageAbbreviationMap.put("mt", "maltese"); + languageAbbreviationMap.put("nl", "dutch"); + languageAbbreviationMap.put("no", "norwegian"); + languageAbbreviationMap.put("pl", "polish"); + languageAbbreviationMap.put("pt", "portuguese"); + languageAbbreviationMap.put("ro", "romanian"); + languageAbbreviationMap.put("ru", "russian"); + languageAbbreviationMap.put("sk", "slovak"); + languageAbbreviationMap.put("sl", "slovenian"); + languageAbbreviationMap.put("sq", "albanian"); + languageAbbreviationMap.put("sr", "serbian"); + languageAbbreviationMap.put("sv", "swedish"); + languageAbbreviationMap.put("th", "thai"); + languageAbbreviationMap.put("tr", "turkish"); + languageAbbreviationMap.put("uk", "ukrainian"); + languageAbbreviationMap.put("vi", "vietnamese"); + languageAbbreviationMap.put("zh", "chinese"); + + final Map<String, String> reverseMap = new HashMap<>(); + for ( final Map.Entry<String, String> entry : languageAbbreviationMap.entrySet() ) { + reverseMap.put(entry.getValue(), entry.getKey()); + } + + languageAbbreviationMap.putAll(reverseMap); + } + + + public static Map<String, String> getLanguageMap() { + return Collections.unmodifiableMap(languageAbbreviationMap); + } +} http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/ObjectMapperResolver.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/ObjectMapperResolver.java b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/ObjectMapperResolver.java new file mode 100644 index 0000000..085cf7f --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/java/org/apache/nifi/processors/yandex/util/ObjectMapperResolver.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.yandex.util; + +import javax.ws.rs.ext.ContextResolver; +import javax.ws.rs.ext.Provider; +import org.codehaus.jackson.map.AnnotationIntrospector; +import org.codehaus.jackson.map.DeserializationConfig; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.SerializationConfig; +import org.codehaus.jackson.map.annotate.JsonSerialize.Inclusion; +import org.codehaus.jackson.xc.JaxbAnnotationIntrospector; + +@Provider +public class ObjectMapperResolver implements ContextResolver<ObjectMapper> { + + private final ObjectMapper mapper; + + public ObjectMapperResolver() throws Exception { + mapper = new ObjectMapper(); + + final AnnotationIntrospector jaxbIntrospector = new JaxbAnnotationIntrospector(); + final SerializationConfig serializationConfig = mapper.getSerializationConfig(); + final DeserializationConfig deserializationConfig = mapper.getDeserializationConfig(); + + mapper.setSerializationConfig(serializationConfig.withSerializationInclusion(Inclusion.NON_NULL).withAnnotationIntrospector(jaxbIntrospector)); + mapper.setDeserializationConfig(deserializationConfig.withAnnotationIntrospector(jaxbIntrospector)); + } + + @Override + public ObjectMapper getContext(Class<?> objectType) { + return mapper; + } +} http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor new file mode 100644 index 0000000..dc81439 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.nifi.processors.yandex.YandexTranslate \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/test/java/org/apache/nifi/processors/yandex/TestYandexTranslate.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/test/java/org/apache/nifi/processors/yandex/TestYandexTranslate.java b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/test/java/org/apache/nifi/processors/yandex/TestYandexTranslate.java new file mode 100644 index 0000000..4f82049 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/src/test/java/org/apache/nifi/processors/yandex/TestYandexTranslate.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.yandex; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + +import org.apache.nifi.processors.yandex.YandexTranslate; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +@Ignore("For local testing only; requires local file to be populated with Yandex API Key") +public class TestYandexTranslate { + + private TestRunner testRunner; + private String apiKey; + + @Before + public void init() throws IOException { + testRunner = TestRunners.newTestRunner(YandexTranslate.class); + + final Properties properties = new Properties(); + try (final InputStream in = new FileInputStream(new File("C:/dev/notes/yandex-info.txt"))) { + properties.load(in); + } + apiKey = properties.getProperty("api_key").trim(); + } + + + @Test + public void testTranslateContent() { + testRunner.setProperty(YandexTranslate.KEY, apiKey); + testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr"); + testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en"); + testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "true"); + testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8"); + + testRunner.enqueue("bonjour".getBytes()); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0); + + final String outText = new String(out.toByteArray()); + assertEquals("hello", outText); + } + + + @Test + public void testTranslateSingleAttribute() { + testRunner.setProperty(YandexTranslate.KEY, apiKey); + testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr"); + testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en"); + testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "false"); + testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8"); + testRunner.setProperty("translated", "bonjour"); + + testRunner.enqueue(new byte[0]); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0); + + assertEquals(0, out.toByteArray().length); + out.assertAttributeEquals("translated", "hello"); + } + + @Test + public void testTranslateMultipleAttributes() { + testRunner.setProperty(YandexTranslate.KEY, apiKey); + testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr"); + testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en"); + testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "false"); + testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8"); + testRunner.setProperty("hello", "bonjour"); + testRunner.setProperty("translate", "traduire"); + testRunner.setProperty("fun", "amusant"); + + testRunner.enqueue(new byte[0]); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0); + + assertEquals(0, out.toByteArray().length); + out.assertAttributeEquals("hello", "hello"); + out.assertAttributeEquals("translate", "translate"); + out.assertAttributeEquals("fun", "fun"); + } + + + @Test + public void testTranslateContentAndMultipleAttributes() { + testRunner.setProperty(YandexTranslate.KEY, apiKey); + testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr"); + testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en"); + testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "true"); + testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8"); + testRunner.setProperty("hello", "bonjour"); + testRunner.setProperty("translate", "traduire"); + testRunner.setProperty("fun", "amusant"); + testRunner.setProperty("nifi", "nifi"); + + testRunner.enqueue("ordinateur".getBytes()); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0); + + out.assertContentEquals("computer"); + + out.assertAttributeEquals("hello", "hello"); + out.assertAttributeEquals("translate", "translate"); + out.assertAttributeEquals("fun", "fun"); + out.assertAttributeEquals("nifi", "nifi"); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/178c5cd2/nifi/nifi-nar-bundles/nifi-language-translation-bundle/pom.xml ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-language-translation-bundle/pom.xml b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/pom.xml new file mode 100644 index 0000000..43573e7 --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-language-translation-bundle/pom.xml @@ -0,0 +1,48 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-nar-bundles</artifactId> + <version>0.1.0-incubating-SNAPSHOT</version> + </parent> + + <artifactId>nifi-language-translation-bundle</artifactId> + <packaging>pom</packaging> + + <modules> + <module>nifi-yandex-processors</module> + <module>nifi-language-translation-nar</module> + </modules> + + <dependencyManagement> + <dependencies> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-client</artifactId> + <version>${jersey.version}</version> + </dependency> + <dependency> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + <version>${jersey.version}</version> + </dependency> + </dependencies> + </dependencyManagement> + +</project>