[ https://issues.apache.org/jira/browse/NIFI-1575?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15189983#comment-15189983 ]
ASF GitHub Bot commented on NIFI-1575: -------------------------------------- Github user markap14 commented on a diff in the pull request: https://github.com/apache/nifi/pull/261#discussion_r55753842 --- Diff: nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/QueryDatabaseTable.java --- @@ -0,0 +1,607 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.standard; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.nifi.annotation.behavior.Stateful; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.state.Scope; +import org.apache.nifi.components.state.StateManager; +import org.apache.nifi.components.state.StateMap; +import org.apache.nifi.dbcp.DBCPService; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ProcessorLog; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.OutputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processors.standard.util.JdbcCommon; +import org.apache.nifi.util.LongHolder; +import org.apache.nifi.util.StopWatch; + +import java.io.IOException; +import java.io.OutputStream; +import java.math.BigDecimal; +import java.sql.Connection; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Timestamp; +import java.text.DecimalFormat; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static java.sql.Types.ARRAY; +import static java.sql.Types.BIGINT; +import static java.sql.Types.BINARY; +import static java.sql.Types.BIT; +import static java.sql.Types.BLOB; +import static java.sql.Types.BOOLEAN; +import static java.sql.Types.CHAR; +import static java.sql.Types.CLOB; +import static java.sql.Types.DATE; +import static java.sql.Types.DECIMAL; +import static java.sql.Types.DOUBLE; +import static java.sql.Types.FLOAT; +import static java.sql.Types.INTEGER; +import static java.sql.Types.LONGNVARCHAR; +import static java.sql.Types.LONGVARBINARY; +import static java.sql.Types.LONGVARCHAR; +import static java.sql.Types.NCHAR; +import static java.sql.Types.NUMERIC; +import static java.sql.Types.NVARCHAR; +import static java.sql.Types.REAL; +import static java.sql.Types.ROWID; +import static java.sql.Types.SMALLINT; +import static java.sql.Types.TIME; +import static java.sql.Types.TIMESTAMP; +import static java.sql.Types.TINYINT; +import static java.sql.Types.VARBINARY; +import static java.sql.Types.VARCHAR; + +@EventDriven +@InputRequirement(Requirement.INPUT_ALLOWED) +@Tags({"sql", "select", "jdbc", "query", "database"}) +@CapabilityDescription("Execute provided SQL select query. Query result will be converted to Avro format." + + " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on " + + "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. " + + "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the " + + "select query. FlowFile attribute 'querydbtable.row.count' indicates how many rows were selected.") +@Stateful(scopes = Scope.CLUSTER, description = "After performing a query on the specified table, the maximum values for " + + "the specified column(s) will be retained for use in future executions of the query. This allows the Processor " + + "to fetch only those records that have max values greater than the retained values. This can be used for " + + "incremental fetching, fetching of newly added rows, etc. To clear the maximum values, clear the state of the processor " + + "per the State Management documentation") +@WritesAttribute(attribute = "querydbtable.row.count") +public class QueryDatabaseTable extends AbstractProcessor { + + public static final String RESULT_ROW_COUNT = "querydbtable.row.count"; + + // Relationships + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("Successfully created FlowFile from SQL query result set.") + .build(); + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("SQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.") + .build(); + private final Set<Relationship> relationships; + + public static final PropertyDescriptor DBCP_SERVICE = new PropertyDescriptor.Builder() + .name("Database Connection Pooling Service") + .description("The Controller Service that is used to obtain a connection to the database.") + .required(true) + .identifiesControllerService(DBCPService.class) + .build(); + + public static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder() + .name("Table Name") + .description("The name of the database table to be queried.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final PropertyDescriptor COLUMN_NAMES = new PropertyDescriptor.Builder() + .name("Columns to return") --- End diff -- should capitalize "return"? > Add ability to query DB table using "last value" from column > ------------------------------------------------------------ > > Key: NIFI-1575 > URL: https://issues.apache.org/jira/browse/NIFI-1575 > Project: Apache NiFi > Issue Type: Improvement > Components: Extensions > Reporter: Matt Burgess > Assignee: Matt Burgess > Fix For: 0.6.0 > > > It would be a useful feature for a processor to be able to query from a > database table for only those records that have are added/available since the > last time the query was executed. For example, if the processor could keep > the max value of a timestamp column for the last result set returned, the > next time the query was issued, it could use that timestamp value and column > to only return records whose timestamps were greater than "the last time". > This shouldn't be limited to timestamps of course; it would be useful for any > strictly-increasing value (like primary key ID) but would be up to the user > to select the table and column. The processor would simply keep the state for > the specified column's max value. > Proposed is a "QueryDBTable" processor which would have a properties to > specify the table name and the column in the table for which to keep state > information about the last maximum value retrieved. Subsequent queries of the > table use this value to filter for rows whose value for the specified column > are greater than the "last maximum value". Upon each successful query, the > "last maximum value" is updated. -- This message was sent by Atlassian JIRA (v6.3.4#6332)