Github user bbende commented on a diff in the pull request: https://github.com/apache/nifi/pull/1645#discussion_r109752585 --- Diff: nifi-nar-bundles/nifi-standard-services/nifi-hbase_1_1_2-client-service-bundle/nifi-hbase_1_1_2-client-service/src/main/java/org/apache/nifi/hbase/HBase_1_1_2_ClientMapCacheService.java --- @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.hbase; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; + +import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient; +import org.apache.nifi.distributed.cache.client.Serializer; +import org.apache.nifi.distributed.cache.client.Deserializer; +import java.io.ByteArrayOutputStream; +import org.apache.nifi.reporting.InitializationException; + +import java.nio.charset.StandardCharsets; +import org.apache.nifi.hbase.scan.ResultCell; +import org.apache.nifi.hbase.scan.ResultHandler; +import org.apache.nifi.hbase.scan.Column; +import org.apache.nifi.hbase.put.PutColumn; + + +import org.apache.nifi.processor.util.StandardValidators; + +@Tags({"distributed", "cache", "state", "map", "cluster","hbase"}) +@SeeAlso(classNames = {"org.apache.nifi.distributed.cache.server.map.DistributedMapCacheClient", "org.apache.nifi.hbase.HBase_1_1_2_ClientService"}) +@CapabilityDescription("Provides the ability to use an HBase table as a cache, in place of a DistributedMapCache." + + " Uses a HBase_1_1_2_ClientService controller to communicate with HBase.") + +public class HBase_1_1_2_ClientMapCacheService extends AbstractControllerService implements DistributedMapCacheClient { + + static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder() + .name("HBase Client Service") + .description("Specifies the HBase Client Controller Service to use for accessing HBase.") + .required(true) + .identifiesControllerService(HBaseClientService.class) + .build(); + + public static final PropertyDescriptor HBASE_CACHE_TABLE_NAME = new PropertyDescriptor.Builder() + .name("HBase Cache Table Name") + .description("Name of the table on HBase to use for the cache.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final PropertyDescriptor HBASE_COLUMN_FAMILY = new PropertyDescriptor.Builder() + .name("HBase Column Family") + .description("Name of the column family on HBase to use for the cache.") + .required(true) + .defaultValue("f") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final PropertyDescriptor HBASE_COLUMN_QUALIFIER = new PropertyDescriptor.Builder() + .name("HBase Column Qualifier") + .description("Name of the column qualifier on HBase to use for the cache") + .defaultValue("q") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + descriptors.add(HBASE_CACHE_TABLE_NAME); + descriptors.add(HBASE_CLIENT_SERVICE); + descriptors.add(HBASE_COLUMN_FAMILY); + descriptors.add(HBASE_COLUMN_QUALIFIER); + return descriptors; + } + + private String hBaseCacheTableName; + private HBaseClientService hBaseClientService; + + private String hBaseColumnFamily; + private byte[] hBaseColumnFamilyBytes; + + private String hBaseColumnQualifier; + private byte[] hBaseColumnQualifierBytes; + + @OnEnabled + public void onConfigured(final ConfigurationContext context) throws InitializationException{ + hBaseCacheTableName = context.getProperty(HBASE_CACHE_TABLE_NAME).getValue(); + hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class); + hBaseColumnFamily = context.getProperty(HBASE_COLUMN_FAMILY).getValue(); + hBaseColumnQualifier = context.getProperty(HBASE_COLUMN_QUALIFIER).getValue(); + + hBaseColumnFamilyBytes = hBaseColumnFamily.getBytes(StandardCharsets.UTF_8); + hBaseColumnQualifierBytes = hBaseColumnQualifier.getBytes(StandardCharsets.UTF_8); + } + + private <T> byte[] serialize(final T value, final Serializer<T> serializer) throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + serializer.serialize(value, baos); + return baos.toByteArray(); + } + private <T> T deserialize(final byte[] value, final Deserializer<T> deserializer) throws IOException { + return deserializer.deserialize(value); + } + + + @Override + public <K, V> boolean putIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException { + if (containsKey(key, keySerializer)) { + put(key, value, keySerializer, valueSerializer); + return true; + } else return false; + } + + @Override + public <K, V> void put(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException { + + List<PutColumn> putColumns = new ArrayList<PutColumn>(1); + final byte[] rowIdBytes = serialize(key, keySerializer); + final byte[] valueBytes = serialize(value, valueSerializer); + + final PutColumn putColumn = new PutColumn(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, valueBytes); + putColumns.add(putColumn); + + hBaseClientService.put(hBaseCacheTableName, rowIdBytes, putColumns); + } + + @Override + public <K> boolean containsKey(final K key, final Serializer<K> keySerializer) throws IOException { + final byte[] rowIdBytes = serialize(key, keySerializer); + final HBaseRowHandler handler = new HBaseRowHandler(); + + final List<Column> columnsList = new ArrayList<Column>(0); + + hBaseClientService.scan(hBaseCacheTableName, rowIdBytes, rowIdBytes, columnsList, handler); + return (handler.numRows() > 0); + } + + @Override + public <K, V> V getAndPutIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer, final Deserializer<V> valueDeserializer) throws IOException { --- End diff -- For putIfAbsent and getAndPutIfAbsent, they currently require two client calls (a get + conditional write). This could be problematic in that you can't ensure that after doing the get that something else hasn't modified the value before you do the write, this could happen for example in a cluster where two nodes go to do a putIfAbsent at the same time with the same key. It looks like HBase has some atomic checkAndMutate operations: https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html I think we should see if we can use these in place of the two calls, although admittedly I have not used them myself.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---