[ https://issues.apache.org/jira/browse/NIFI-3413?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15941182#comment-15941182 ]
ASF GitHub Bot commented on NIFI-3413: -------------------------------------- Github user phrocker commented on a diff in the pull request: https://github.com/apache/nifi/pull/1618#discussion_r107909853 --- Diff: nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/db/event/TableInfo.java --- @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.standard.db.event; + +import org.apache.nifi.distributed.cache.client.exception.DeserializationException; +import org.apache.nifi.distributed.cache.client.exception.SerializationException; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * A POJO for holding table information related to update events. + */ +public class TableInfo { + + final static String DB_TABLE_NAME_DELIMITER = "@!@"; + + private String databaseName; + private String tableName; + private Long tableId; + private List<ColumnDefinition> columns; + + public TableInfo(String databaseName, String tableName, Long tableId, List<ColumnDefinition> columns) { + this.databaseName = databaseName; + this.tableName = tableName; + this.tableId = tableId; + this.columns = columns; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public Long getTableId() { + return tableId; + } + + public List<ColumnDefinition> getColumns() { + return columns; + } + + public void setColumns(List<ColumnDefinition> columns) { + this.columns = columns; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TableInfo tableInfo = (TableInfo) o; + + if (!databaseName.equals(tableInfo.databaseName)) return false; + if (!tableName.equals(tableInfo.tableName)) return false; + if (!tableId.equals(tableInfo.tableId)) return false; + return columns != null ? columns.equals(tableInfo.columns) : tableInfo.columns == null; + } + + @Override + public int hashCode() { + int result = databaseName.hashCode(); + result = 31 * result + tableName.hashCode(); + result = 31 * result + tableId.hashCode(); + result = 31 * result + (columns != null ? columns.hashCode() : 0); + return result; + } + + public static class Serializer implements org.apache.nifi.distributed.cache.client.Serializer<TableInfo> { + + @Override + public void serialize(TableInfo value, OutputStream output) throws SerializationException, IOException { + StringBuilder sb = new StringBuilder(value.getDatabaseName()); + sb.append(DB_TABLE_NAME_DELIMITER); + sb.append(value.getTableName()); + sb.append(DB_TABLE_NAME_DELIMITER); + sb.append(value.getTableId()); + List<ColumnDefinition> columnDefinitions = value.getColumns(); + if (columnDefinitions != null && !columnDefinitions.isEmpty()) { + sb.append(DB_TABLE_NAME_DELIMITER); + sb.append(columnDefinitions.stream().map((col) -> col.getName() + DB_TABLE_NAME_DELIMITER + col.getType()).collect(Collectors.joining(DB_TABLE_NAME_DELIMITER))); + } + output.write(sb.toString().getBytes()); + } + } + + public static class Deserializer implements org.apache.nifi.distributed.cache.client.Deserializer<TableInfo> { + + @Override + public TableInfo deserialize(byte[] input) throws DeserializationException, IOException { + // Don't bother deserializing if empty, just return null. This usually happens when the key is not found in the cache + if (input == null || input.length == 0) { + return null; + } + String inputString = new String(input); + String[] tokens = inputString.split(DB_TABLE_NAME_DELIMITER); + int numTokens = tokens.length; + if (numTokens < 3) { + throw new IOException("Could not deserialize TableInfo from the following value: " + inputString); + } + String dbName = tokens[0]; + String tableName = tokens[1]; + Long tableId; + try { + tableId = Long.parseLong(tokens[2]); + } catch (NumberFormatException nfe) { + throw new IOException("Illegal table ID: " + tokens[2]); + } + // Parse column names and types + List<ColumnDefinition> columnDefinitions = new ArrayList<>(); + for (int i = 0; i < numTokens - 3; i += 2) { --- End diff -- Is there any way to be more defensive here in the unlikely event that something is written with incorrect number of tokens? Does that even make sense? I assume there is an uncaught exception handler in the path that might make this concern moot. > Implement a GetChangeDataCapture processor > ------------------------------------------ > > Key: NIFI-3413 > URL: https://issues.apache.org/jira/browse/NIFI-3413 > Project: Apache NiFi > Issue Type: New Feature > Components: Extensions > Reporter: Matt Burgess > Assignee: Matt Burgess > > Database systems such as MySQL, Oracle, and SQL Server allow access to their > transactional logs and such, in order for external clients to have a "change > data capture" (CDC) capability. I propose a GetChangeDataCapture processor to > enable this in NiFi. > The processor would be configured with a DBCPConnectionPool controller > service, as well as a Database Type property (similar to the one in > QueryDatabaseTable) for database-specific handling. Additional properties > might include the CDC table name, etc. Additional database-specific > properties could be handled using dynamic properties (and the documentation > should reflect this). > The processor would accept no incoming connections (it is a "Get" or source > processor), would be intended to run on the primary node only as a single > threaded processor, and would generate a flow file for each operation > (INSERT, UPDATE, DELETE, e,g,) in one or some number of formats (JSON, e.g.). > The flow files would be transferred in time order (to enable a replication > solution, for example), perhaps with some auto-incrementing attribute to also > indicate order if need be. -- This message was sent by Atlassian JIRA (v6.3.15#6346)