[ 
https://issues.apache.org/jira/browse/METRON-1005?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16100096#comment-16100096
 ] 

ASF GitHub Bot commented on METRON-1005:
----------------------------------------

Github user nickwallen commented on a diff in the pull request:

    https://github.com/apache/metron/pull/622#discussion_r129319336
  
    --- Diff: 
metron-analytics/metron-profiler-common/src/main/java/org/apache/metron/profiler/hbase/DecodableRowKeyBuilder.java
 ---
    @@ -0,0 +1,402 @@
    +/*
    + *
    + *  Licensed to the Apache Software Foundation (ASF) under one
    + *  or more contributor license agreements.  See the NOTICE file
    + *  distributed with this work for additional information
    + *  regarding copyright ownership.  The ASF licenses this file
    + *  to you under the Apache License, Version 2.0 (the
    + *  "License"); you may not use this file except in compliance
    + *  with the License.  You may obtain a copy of the License at
    + *
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + *
    + *  Unless required by applicable law or agreed to in writing, software
    + *  distributed under the License is distributed on an "AS IS" BASIS,
    + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
    + *  See the License for the specific language governing permissions and
    + *  limitations under the License.
    + *
    + */
    +
    +package org.apache.metron.profiler.hbase;
    +
    +import org.apache.hadoop.hbase.util.Bytes;
    +import org.apache.metron.profiler.ProfileMeasurement;
    +import org.apache.metron.profiler.ProfilePeriod;
    +
    +import java.nio.BufferUnderflowException;
    +import java.nio.ByteBuffer;
    +import java.nio.ByteOrder;
    +import java.security.MessageDigest;
    +import java.security.NoSuchAlgorithmException;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Optional;
    +import java.util.concurrent.TimeUnit;
    +
    +import static 
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_PERIOD;
    +import static 
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_PERIOD_UNITS;
    +import static 
org.apache.metron.profiler.ProfilerClientConfig.PROFILER_SALT_DIVISOR;
    +
    +/**
    + * Responsible for building the row keys used to store profile data in 
HBase.
    + *
    + * This builder generates decodable row keys.  A decodable row key is one 
that can be interrogated to extract
    + * the constituent components of that row key.  Given a previously 
generated row key this builder
    + * can extract the profile name, entity name, group name(s), period 
duration, and period.
    + *
    + * The row key is composed of the following fields.
    + * <ul>
    + * <li>magic number - Helps to validate the row key.</li>
    + * <li>version - The version number of the row key.</li>
    + * <li>salt - A salt that helps prevent hot-spotting.
    + * <li>profile - The name of the profile.
    + * <li>entity - The name of the entity being profiled.
    + * <li>group(s) - The group(s) used to sort the data in HBase. For 
example, a group may distinguish between weekends and weekdays.
    + * <li>period - The period in which the measurement was taken. The first 
period starts at the epoch and increases monotonically.
    + * </ul>
    + */
    +public class DecodableRowKeyBuilder implements RowKeyBuilder {
    +
    +  /**
    +   * Defines the byte order when encoding and decoding the row keys.
    +   *
    +   * Making this configurable is likely not necessary and is left as a 
practice exercise for the reader. :)
    +   */
    +  private static final ByteOrder byteOrder = ByteOrder.BIG_ENDIAN;
    +
    +  /**
    +   * Defines some level of sane max field length to avoid any shenanigans 
with oddly encoded row keys.
    +   */
    +  private static final int MAX_FIELD_LENGTH = 1000;
    +
    +  /**
    +   * A magic number embedded in each row key to help validate the row key 
and byte ordering when decoding.
    +   */
    +  protected static final short MAGIC_NUMBER = 77;
    +
    +  /**
    +   * The version number of the row keys supported by this builder.
    +   */
    +  protected static final byte VERSION = (byte) 1;
    +
    +  /**
    +   * A salt can be prepended to the row key to help prevent hot-spotting.  
The salt
    +   * divisor is used to generate the salt.  The salt divisor should be 
roughly equal
    +   * to the number of nodes in the Hbase cluster.
    +   */
    +  private int saltDivisor;
    +
    +  /**
    +   * The duration of each profile period in milliseconds.
    +   */
    +  private long periodDurationMillis;
    +
    +  public DecodableRowKeyBuilder() {
    +    this(PROFILER_SALT_DIVISOR.getDefault(Integer.class),
    +            PROFILER_PERIOD.getDefault(Long.class),
    +            
TimeUnit.valueOf(PROFILER_PERIOD_UNITS.getDefault(String.class)));
    +  }
    +
    +  public DecodableRowKeyBuilder(int saltDivisor, long duration, TimeUnit 
units) {
    +    this.saltDivisor = saltDivisor;
    +    this.periodDurationMillis = units.toMillis(duration);
    +  }
    +
    +  /**
    +   * Builds a list of row keys necessary to retrieve profile measurements 
over
    +   * a time horizon.
    +   *
    +   * @param profile The name of the profile.
    +   * @param entity The name of the entity.
    +   * @param groups The group(s) used to sort the profile data.
    +   * @param start When the time horizon starts in epoch milliseconds.
    +   * @param end When the time horizon ends in epoch milliseconds.
    +   * @return All of the row keys necessary to retrieve the profile 
measurements.
    +   */
    +  @Override
    +  public List<byte[]> encode(String profile, String entity, List<Object> 
groups, long start, long end) {
    +    // be forgiving of out-of-order start and end times; order is critical 
to this algorithm
    +    end = Math.max(start, end);
    +    start = Math.min(start, end);
    +
    +    // find the starting period and advance until the end time is reached
    +    return ProfilePeriod.visitPeriods( start
    +            , end
    +            , periodDurationMillis
    +            , TimeUnit.MILLISECONDS
    +            , Optional.empty()
    +            , period -> encode(profile, entity, groups, period)
    +    );
    +
    +  }
    +
    +  /**
    +   * Builds a list of row keys necessary to retrieve a profile's 
measurements over
    +   * a time horizon.
    +   * <p>
    +   * This method is useful when attempting to read ProfileMeasurements 
stored in HBase.
    +   *
    +   * @param profile    The name of the profile.
    +   * @param entity     The name of the entity.
    +   * @param groups     The group(s) used to sort the profile data.
    +   * @param periods    The profile measurement periods to compute the 
rowkeys for
    +   * @return All of the row keys necessary to retrieve the profile 
measurements.
    +   */
    +  @Override
    +  public List<byte[]> encode(String profile, String entity, List<Object> 
groups, Iterable<ProfilePeriod> periods) {
    +    List<byte[]> rowKeys = new ArrayList<>();
    +    for(ProfilePeriod period : periods) {
    +      rowKeys.add(encode(profile, entity, groups, period));
    +    }
    +    return rowKeys;
    +  }
    +
    +  /**
    +   * Builds the row key for a given profile measurement.
    +   * @param m The profile measurement.
    +   * @return The HBase row key.
    +   */
    +  @Override
    +  public byte[] encode(ProfileMeasurement m) {
    +    return encode(m.getProfileName(), m.getEntity(), m.getGroups(), 
m.getPeriod());
    +  }
    +
    +  /**
    +   * Build the row key.
    +   * @param profile The name of the profile.
    +   * @param entity The name of the entity.
    +   * @param period The period in which the measurement was taken.
    +   * @param groups The groups.
    +   * @return The HBase row key.
    +   */
    +  public byte[] encode(String profile, String entity, List<Object> groups, 
ProfilePeriod period) {
    +
    +    if(profile == null)
    +      throw new IllegalArgumentException("Cannot encode row key; invalid 
profile name.");
    +    if(entity == null)
    +      throw new IllegalArgumentException("Cannot encode row key; invalid 
entity name.");
    +    if(period == null)
    +      throw new IllegalArgumentException("Cannot encode row key; invalid 
profile period.");
    +
    +    long periodId = period.getPeriod();
    +    long periodDurationMillis = period.getDurationMillis();
    +
    +    byte[] salt = encodeSalt(periodId, saltDivisor);
    +    byte[] profileB = Bytes.toBytes(profile);
    +    byte[] entityB = Bytes.toBytes(entity);
    +    byte[] groupB = encodeGroups(groups);
    +
    +    int capacity = Short.BYTES + 1 + salt.length + profileB.length + 
entityB.length + groupB.length + (Integer.BYTES * 3) + (Long.BYTES * 2);
    +    ByteBuffer buffer = ByteBuffer
    +            .allocate(capacity)
    +            .order(byteOrder)
    +            .putShort(MAGIC_NUMBER)
    +            .put(VERSION)
    +            .putInt(salt.length)
    +            .put(salt)
    +            .putInt(profileB.length)
    +            .put(profileB)
    +            .putInt(entityB.length)
    +            .put(entityB)
    +            .put(groupB)
    --- End diff --
    
    @mattf-horton A 'length' the number of groups along with the size of each 
group name is encoded.  See the function `encodeGroup`.
    
    @cestella I would be fine with shorts instead of ints.


> Create Decodable Row Key for Profiler
> -------------------------------------
>
>                 Key: METRON-1005
>                 URL: https://issues.apache.org/jira/browse/METRON-1005
>             Project: Metron
>          Issue Type: Improvement
>    Affects Versions: 0.3.0
>            Reporter: Nick Allen
>            Assignee: Nick Allen
>             Fix For: Next + 1
>
>
> To be able to answer the types of questions that I outlined in METRON-450, we 
> need a row key that is decodable.  Right now there is no logic to decode a 
> row key, nor is the existing row key easily decodable.  
> Once the row keys can be decoded, you could scan all of the row keys in the 
> Profiler's HBase table, decode each of them and extract things like, the 
> names of all your profiles, the names of entities within a profile, the 
> period duration of a given profile.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to