This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new ffe4e7f  Travis CI build asf-site
ffe4e7f is described below

commit ffe4e7f70b14cd5c5949c7223b005c9af5cb7891
Author: CI <ci...@hudi.apache.org>
AuthorDate: Sat Feb 20 11:55:15 2021 +0000

    Travis CI build asf-site
---
 content/activity.html                       |  24 ++
 content/assets/js/lunr/lunr-store.js        |   5 +
 content/blog.html                           |  24 ++
 content/blog/hudi-key-generators/index.html | 637 ++++++++++++++++++++++++++++
 content/cn/activity.html                    |  24 ++
 content/sitemap.xml                         |   4 +
 6 files changed, 718 insertions(+)

diff --git a/content/activity.html b/content/activity.html
index 7ec278e..71bf9b4 100644
--- a/content/activity.html
+++ b/content/activity.html
@@ -193,6 +193,30 @@
     
     <h2 class="archive__item-title" itemprop="headline">
       
+        <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key 
Generators
+</a>
+      
+    </h2>
+    <!-- Look the author details up from the site config. -->
+    
+    <!-- Output author details if some exist. -->
+    
+ 
+    <p class="archive__item-excerpt" itemprop="description">Different key 
generators available with Apache Hudi
+</p>
+  </article>
+</div>
+
+        
+        
+
+
+
+<div class="list__item">
+  <article class="archive__item" itemscope 
itemtype="https://schema.org/CreativeWork";>
+    
+    <h2 class="archive__item-title" itemprop="headline">
+      
         <a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data 
lake layout using Clustering in Apache Hudi
 </a>
       
diff --git a/content/assets/js/lunr/lunr-store.js 
b/content/assets/js/lunr/lunr-store.js
index bf8a81a..ae425f0 100644
--- a/content/assets/js/lunr/lunr-store.js
+++ b/content/assets/js/lunr/lunr-store.js
@@ -1438,4 +1438,9 @@ var store = [{
         "excerpt":"Background Apache Hudi brings stream processing to big 
data, providing fresh data while being an order of magnitude efficient over 
traditional batch processing. In a data lake/warehouse, one of the key 
trade-offs is between ingestion speed and query performance. Data ingestion 
typically prefers small files to improve parallelism and make...","categories": 
["blog"],
         "tags": [],
         "url": "https://hudi.apache.org/blog/hudi-clustering-intro/";,
+        "teaser":"https://hudi.apache.org/assets/images/500x300.png"},{
+        "title": "Apache Hudi Key Generators",
+        "excerpt":"Every record in Hudi is uniquely identified by a HoodieKey, 
which is a pair of record key and partition path where the record belongs to. 
Hudi has imposed this constraint so that updates and deletes can be applied to 
the record of interest. Hudi relies on the partition path 
field...","categories": ["blog"],
+        "tags": [],
+        "url": "https://hudi.apache.org/blog/hudi-key-generators/";,
         "teaser":"https://hudi.apache.org/assets/images/500x300.png"},]
diff --git a/content/blog.html b/content/blog.html
index 004a368..0935196 100644
--- a/content/blog.html
+++ b/content/blog.html
@@ -191,6 +191,30 @@
     
     <h2 class="archive__item-title" itemprop="headline">
       
+        <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key 
Generators
+</a>
+      
+    </h2>
+    <!-- Look the author details up from the site config. -->
+    
+    <!-- Output author details if some exist. -->
+    
+ 
+    <p class="archive__item-excerpt" itemprop="description">Different key 
generators available with Apache Hudi
+</p>
+  </article>
+</div>
+
+        
+        
+
+
+
+<div class="list__item">
+  <article class="archive__item" itemscope 
itemtype="https://schema.org/CreativeWork";>
+    
+    <h2 class="archive__item-title" itemprop="headline">
+      
         <a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data 
lake layout using Clustering in Apache Hudi
 </a>
       
diff --git a/content/blog/hudi-key-generators/index.html 
b/content/blog/hudi-key-generators/index.html
new file mode 100644
index 0000000..508dafb
--- /dev/null
+++ b/content/blog/hudi-key-generators/index.html
@@ -0,0 +1,637 @@
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    <meta charset="utf-8">
+
+<!-- begin _includes/seo.html --><title>Apache Hudi Key Generators - Apache 
Hudi</title>
+<meta name="description" content="Different key generators available with 
Apache Hudi">
+
+<meta property="og:type" content="article">
+<meta property="og:locale" content="en_US">
+<meta property="og:site_name" content="">
+<meta property="og:title" content="Apache Hudi Key Generators">
+<meta property="og:url" 
content="https://hudi.apache.org/blog/hudi-key-generators/";>
+
+
+  <meta property="og:description" content="Different key generators available 
with Apache Hudi">
+
+
+
+
+
+
+
+
+
+
+
+<!-- end _includes/seo.html -->
+
+
+<!--<link href="/feed.xml" type="application/atom+xml" rel="alternate" title=" 
Feed">-->
+
+<!-- https://t.co/dKP3o1e -->
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+<script>
+  document.documentElement.className = 
document.documentElement.className.replace(/\bno-js\b/g, '') + ' js ';
+</script>
+
+<!-- For all browsers -->
+<link rel="stylesheet" href="/assets/css/main.css">
+
+<!--[if IE]>
+  <style>
+    /* old IE unsupported flexbox fixes */
+    .greedy-nav .site-title {
+      padding-right: 3em;
+    }
+    .greedy-nav button {
+      position: absolute;
+      top: 0;
+      right: 0;
+      height: 100%;
+    }
+  </style>
+<![endif]-->
+
+
+
+<link rel="icon" type="image/x-icon" href="/assets/images/favicon.ico">
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+<script src="/assets/js/jquery.min.js"></script>
+
+    
+<script src="/assets/js/main.min.js"></script>
+
+  </head>
+
+  <body class="layout--single">
+    <!--[if lt IE 9]>
+<div class="notice--danger align-center" style="margin: 0;">You are using an 
<strong>outdated</strong> browser. Please <a 
href="https://browsehappy.com/";>upgrade your browser</a> to improve your 
experience.</div>
+<![endif]-->
+
+    <div class="masthead">
+  <div class="masthead__inner-wrap" id="masthead__inner-wrap">
+    <div class="masthead__menu">
+      <nav id="site-nav" class="greedy-nav">
+        
+          <a class="site-logo" href="/">
+              <div style="width: 150px; height: 40px">
+              </div>
+          </a>
+        
+        <a class="site-title" href="/">
+          
+        </a>
+        <ul class="visible-links"><li class="masthead__menu-item">
+              <a href="/docs/quick-start-guide.html" target="_self" 
>Documentation</a>
+            </li><li class="masthead__menu-item">
+              <a href="/community.html" target="_self" >Community</a>
+            </li><li class="masthead__menu-item">
+              <a href="/blog.html" target="_self" >Blog</a>
+            </li><li class="masthead__menu-item">
+              <a href="https://cwiki.apache.org/confluence/display/HUDI/FAQ"; 
target="_blank" >FAQ</a>
+            </li><li class="masthead__menu-item">
+              <a href="/docs/powered_by.html" target="_self" >Powered By</a>
+            </li><li class="masthead__menu-item">
+              <a href="/releases.html" target="_self" >Releases</a>
+            </li></ul>
+        <button class="greedy-nav__toggle hidden" type="button">
+          <span class="visually-hidden">Toggle menu</span>
+          <div class="navicon"></div>
+        </button>
+        <ul class="hidden-links hidden"></ul>
+      </nav>
+    </div>
+  </div>
+</div>
+<!--
+<p class="notice--warning" style="margin: 0 !important; text-align: center 
!important;"><strong>Note:</strong> This site is work in progress, if you 
notice any issues, please <a target="_blank" 
href="https://github.com/apache/hudi/issues";>Report on Issue</a>.
+  Click <a href="/"> here</a> back to old site.</p>
+-->
+
+    <div class="initial-content">
+      <div id="main" role="main">
+  
+
+  <div class="sidebar sticky">
+
+  
+    <div itemscope itemtype="https://schema.org/Person";>
+
+  <div class="author__content">
+    
+      <h3 class="author__name" itemprop="name">Quick Links</h3>
+    
+    
+      <div class="author__bio" itemprop="description">
+        <p>Hudi <em>ingests</em> &amp; <em>manages</em> storage of large 
analytical datasets over DFS.</p>
+
+      </div>
+    
+  </div>
+
+  <div class="author__urls-wrapper">
+    <ul class="author__urls social-icons">
+      
+        
+          <li><a href="/docs/quick-start-guide" target="_self" rel="nofollow 
noopener noreferrer"><i class="fa fa-book" aria-hidden="true"></i> 
Documentation</a></li>
+
+          
+        
+          <li><a href="https://cwiki.apache.org/confluence/display/HUDI"; 
target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-wikipedia-w" 
aria-hidden="true"></i> Technical Wiki</a></li>
+
+          
+        
+          <li><a href="/contributing" target="_self" rel="nofollow noopener 
noreferrer"><i class="fa fa-thumbs-o-up" aria-hidden="true"></i> Contribution 
Guide</a></li>
+
+          
+        
+          <li><a 
href="https://join.slack.com/t/apache-hudi/shared_invite/enQtODYyNDAxNzc5MTg2LTE5OTBlYmVhYjM0N2ZhOTJjOWM4YzBmMWU2MjZjMGE4NDc5ZDFiOGQ2N2VkYTVkNzU3ZDQ4OTI1NmFmYWQ0NzE";
 target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-slack" 
aria-hidden="true"></i> Join on Slack</a></li>
+
+          
+        
+          <li><a href="https://github.com/apache/hudi"; target="_blank" 
rel="nofollow noopener noreferrer"><i class="fa fa-github" 
aria-hidden="true"></i> Fork on GitHub</a></li>
+
+          
+        
+          <li><a href="https://issues.apache.org/jira/projects/HUDI/summary"; 
target="_blank" rel="nofollow noopener noreferrer"><i class="fa fa-navicon" 
aria-hidden="true"></i> Report Issues</a></li>
+
+          
+        
+          <li><a href="/security" target="_self" rel="nofollow noopener 
noreferrer"><i class="fa fa-navicon" aria-hidden="true"></i> Report Security 
Issues</a></li>
+
+          
+        
+      
+    </ul>
+  </div>
+</div>
+
+  
+
+  
+  </div>
+
+
+  <article class="page" itemscope itemtype="https://schema.org/CreativeWork";>
+    <!-- Look the author details up from the site config. -->
+    
+
+    <div class="page__inner-wrap">
+      
+        <header>
+          <h1 id="page-title" class="page__title" itemprop="headline">Apache 
Hudi Key Generators
+</h1>
+          <!-- Output author details if some exist. -->
+          
+        </header>
+      
+
+      <section class="page__content" itemprop="text">
+        
+          <style>
+            .page {
+              padding-right: 0 !important;
+            }
+          </style>
+        
+        <p>Every record in Hudi is uniquely identified by a HoodieKey, which 
is a pair of record key and partition path where the 
+record belongs to. Hudi has imposed this constraint so that updates and 
deletes can be applied to the record of interest. 
+Hudi relies on the partition path field to partition your dataset and records 
within a partition have unique record keys. 
+Since uniqueness is guaranteed only within the partition, there could be 
records with same record keys across different 
+partitions. One should choose the partition field wisely as it could be a 
determining factor for your ingestion and 
+query latency.</p>
+
+<h2 id="key-generators">Key Generators</h2>
+
+<p>Hudi exposes a number of out of the box key generators that customers can 
use based on their need. Or can have their 
+own implementation for the KeyGenerator. This blog goes over all different 
types of key generators that are readily 
+available to use.</p>
+
+<p><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenerator.java";>Here</a>
+is the interface for KeyGenerator in Hudi for your reference.</p>
+
+<p>Before diving into different types of key generators, let’s go over some of 
the common configs required to be set for 
+key generators.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config</th>
+      <th style="text-align: center">Meaning/purpose</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.datasource.write.recordkey.field</code></td>
+      <td style="text-align: center">Refers to record key field. This is a 
mandatory field.</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.datasource.write.partitionpath.field</code></td>
+      <td style="text-align: center">Refers to partition path field. This is a 
mandatory field.</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.datasource.write.keygenerator.class</code></td>
+      <td style="text-align: center">Refers to Key generator class(including 
full path). Could refer to any of the available ones or user defined one. This 
is a mandatory field.</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.datasource.write.partitionpath.urlencode</code></td>
+      <td style="text-align: center">When set to true, partition path will be 
url encoded. Default value is false.</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.datasource.write.hive_style_partitioning</code></td>
+      <td style="text-align: center">When set to true, uses hive style 
partitioning. Partition field name will be prefixed to the value. Format: 
“<partition_path_field_name>=<partition_path_value>”. Default value is 
false.</partition_path_value></partition_path_field_name></td>
+    </tr>
+  </tbody>
+</table>
+
+<p>There are few more configs involved if you are looking for 
TimestampBasedKeyGenerator. Will cover those in the respective section.</p>
+
+<p>Lets go over different key generators available to be used with Hudi.</p>
+
+<h3 id="simplekeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java";>SimpleKeyGenerator</a></h3>
+
+<p>Record key refers to one field(column in dataframe) by name and partition 
path refers to one field (single column in dataframe) 
+by name. This is one of the most commonly used one. Values are interpreted as 
is from dataframe and converted to string.</p>
+
+<h3 id="complexkeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java";>ComplexKeyGenerator</a></h3>
+<p>Both record key and partition paths comprise one or more than one field by 
name(combination of multiple fields). Fields 
+are expected to be comma separated in the config value. For example <code 
class="highlighter-rouge">"Hoodie.datasource.write.recordkey.field" : 
“col1,col4”</code></p>
+
+<h3 id="globaldeletekeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java";>GlobalDeleteKeyGenerator</a></h3>
+<p>Global index deletes do not require partition value. So this key generator 
avoids using partition value for generating HoodieKey.</p>
+
+<h3 id="timestampbasedkeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java";>TimestampBasedKeyGenerator</a></h3>
+<p>This key generator relies on timestamps for the partition field. The field 
values are interpreted as timestamps 
+and not just converted to string while generating partition path value for 
records.  Record key is same as before where it is chosen by 
+field name.  Users are expected to set few more configs to use this 
KeyGenerator.</p>
+
+<p>Configs to be set:</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config</th>
+      <th>Meaning/purpose</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>One of the timestamp types supported(UNIX_TIMESTAMP, DATE_STRING, 
MIXED, EPOCHMILLISECONDS, SCALAR)</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>Output date format</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+      <td>Timezone of the data format</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">oodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>Input date format</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Let’s go over some example values for TimestampBasedKeyGenerator.</p>
+
+<h4 id="timestamp-is-gmt">Timestamp is GMT</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“EPOCHMILLISECONDS”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyy-MM-dd hh”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+      <td>“GMT+8:00”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input Field value: “1578283932000L” <br />
+Partition path generated from key generator: “2020-01-06 12”</p>
+
+<p>If input field value is null for some rows. <br />
+Partition path generated from key generator: “1970-01-01 08”</p>
+
+<h4 id="timestamp-is-date_string">Timestamp is DATE_STRING</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“DATE_STRING”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyy-MM-dd hh”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+      <td>“GMT+8:00”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>“yyyy-MM-dd hh:mm:ss”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “2020-01-06 12:12:12” <br />
+Partition path generated from key generator: “2020-01-06 12”</p>
+
+<p>If input field value is null for some rows. <br />
+Partition path generated from key generator: “1970-01-01 12:00:00”
+<br /></p>
+
+<h4 id="scalar-examples">Scalar examples</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“SCALAR”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyy-MM-dd hh”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timezone</code></td>
+      <td>“GMT”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit</code></td>
+      <td>“days”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “20000L” <br />
+Partition path generated from key generator: “2024-10-04 12”</p>
+
+<p>If input field value is null. <br />
+Partition path generated from key generator: “1970-01-02 12”</p>
+
+<h4 id="iso8601withmsz-with-single-input-format">ISO8601WithMsZ with Single 
Input format</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“DATE_STRING”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>“yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyyMMddHH”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+      <td>“GMT”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33.428Z” <br />
+Partition path generated from key generator: “2020040113”</p>
+
+<h4 id="iso8601withmsz-with-multiple-input-formats">ISO8601WithMsZ with 
Multiple Input formats</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“DATE_STRING”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyyMMddHH”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+      <td>“UTC”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33.428Z” <br />
+Partition path generated from key generator: “2020040113”</p>
+
+<h4 id="iso8601noms-with-offset-using-multiple-input-formats">ISO8601NoMs with 
offset using multiple input formats</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“DATE_STRING”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“yyyyMMddHH”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+      <td>“UTC”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “2020-04-01T13:01:33-<strong>05:00</strong>” <br />
+Partition path generated from key generator: “2020040118”</p>
+
+<h4 id="input-as-short-date-string-and-expect-date-in-date-format">Input as 
short date string and expect date in date format</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Config field</th>
+      <th>Value</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.timestamp.type</code></td>
+      <td>“DATE_STRING”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat</code></td>
+      <td>“yyyy-MM-dd’T’HH:mm:ssZ,yyyy-MM-dd’T’HH:mm:ss.SSSZ,yyyyMMdd”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex</code></td>
+      <td>””</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.input.timezone</code></td>
+      <td>“UTC”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.dateformat</code></td>
+      <td>“MM/dd/yyyy”</td>
+    </tr>
+    <tr>
+      <td><code 
class="highlighter-rouge">hoodie.deltastreamer.keygen.timebased.output.timezone</code></td>
+      <td>“UTC”</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Input field value: “220200401” <br />
+Partition path generated from key generator: “04/01/2020”</p>
+
+<h3 id="customkeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java";>CustomKeyGenerator</a></h3>
+<p>This is a generic implementation of KeyGenerator where users are able to 
leverage the benefits of SimpleKeyGenerator, 
+ComplexKeyGenerator and TimestampBasedKeyGenerator all at the same time. One 
can configure record key and partition 
+paths as a single field or a combination of fields. This keyGenerator is 
particularly useful if you want to define 
+complex partition paths involving regular fields and timestamp based fields. 
It expects value for prop <code 
class="highlighter-rouge">"hoodie.datasource.write.partitionpath.field"</code> 
+in a specific format. The format should be 
“field1:PartitionKeyType1,field2:PartitionKeyType2…”</p>
+
+<p>The complete partition path is created as
+<code class="highlighter-rouge">&lt;value for field1 basis 
PartitionKeyType1&gt;/&lt;value for field2 basis PartitionKeyType2&gt;</code>
+and so on. Each partition key type could either be SIMPLE or TIMESTAMP.</p>
+
+<p>Example config value: <code 
class="highlighter-rouge">“field_3:simple,field_5:timestamp”</code></p>
+
+<p>RecordKey config value is either single field incase of SimpleKeyGenerator 
or a comma separate field names if referring to ComplexKeyGenerator.
+Eg: “col1” or “col3,col4”.</p>
+
+<h3 id="nonpartitionedkeygenerator"><a 
href="https://github.com/apache/hudi/blob/master/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java";>NonPartitionedKeyGenerator</a></h3>
+<p>If your hudi dataset is not partitioned, you could use this 
“NonPartitionedKeyGenerator” which will return an empty 
+partition for all records. In other words, all records go to the same 
partition (which is empty “”)</p>
+
+<p>Hope this blog gave you a good understanding of different types of Key 
Generators available in Apache Hudi. Thanks for your continued support for 
Hudi’s community.</p>
+
+
+      </section>
+
+      <a href="#masthead__inner-wrap" class="back-to-top">Back to top 
&uarr;</a>
+
+
+      
+
+    </div>
+
+  </article>
+
+</div>
+
+    </div>
+
+    <div class="page__footer">
+      <footer>
+        
+<div class="row">
+  <div class="col-lg-12 footer">
+    <p>
+      <table class="table-apache-info">
+        <tr>
+          <td>
+            <a class="footer-link-img" href="https://apache.org";>
+              <img width="250px" src="/assets/images/asf_logo.svg" alt="The 
Apache Software Foundation">
+            </a>
+          </td>
+          <td>
+            <a style="float: right" 
href="https://www.apache.org/events/current-event.html";>
+              <img 
src="https://www.apache.org/events/current-event-234x60.png"; />
+            </a>
+          </td>
+        </tr>
+      </table>
+    </p>
+    <p>
+      <a href="https://www.apache.org/licenses/";>License</a> | <a 
href="https://www.apache.org/security/";>Security</a> | <a 
href="https://www.apache.org/foundation/thanks.html";>Thanks</a> | <a 
href="https://www.apache.org/foundation/sponsorship.html";>Sponsorship</a>
+    </p>
+    <p>
+      Copyright &copy; <span id="copyright-year">2019</span> <a 
href="https://apache.org";>The Apache Software Foundation</a>, Licensed under 
the <a href="https://www.apache.org/licenses/LICENSE-2.0";> Apache License, 
Version 2.0</a>.
+      Hudi, Apache and the Apache feather logo are trademarks of The Apache 
Software Foundation. <a href="/docs/privacy">Privacy Policy</a>
+    </p>
+  </div>
+</div>
+      </footer>
+    </div>
+
+
+  </body>
+</html>
\ No newline at end of file
diff --git a/content/cn/activity.html b/content/cn/activity.html
index eb734ec..e096c4a 100644
--- a/content/cn/activity.html
+++ b/content/cn/activity.html
@@ -191,6 +191,30 @@
     
     <h2 class="archive__item-title" itemprop="headline">
       
+        <a href="/blog/hudi-key-generators/" rel="permalink">Apache Hudi Key 
Generators
+</a>
+      
+    </h2>
+    <!-- Look the author details up from the site config. -->
+    
+    <!-- Output author details if some exist. -->
+    
+ 
+    <p class="archive__item-excerpt" itemprop="description">Different key 
generators available with Apache Hudi
+</p>
+  </article>
+</div>
+
+        
+        
+
+
+
+<div class="list__item">
+  <article class="archive__item" itemscope 
itemtype="https://schema.org/CreativeWork";>
+    
+    <h2 class="archive__item-title" itemprop="headline">
+      
         <a href="/blog/hudi-clustering-intro/" rel="permalink">Optimize Data 
lake layout using Clustering in Apache Hudi
 </a>
       
diff --git a/content/sitemap.xml b/content/sitemap.xml
index a6de7ea..5d19956 100644
--- a/content/sitemap.xml
+++ b/content/sitemap.xml
@@ -1153,6 +1153,10 @@
 <lastmod>2021-01-27T00:00:00-05:00</lastmod>
 </url>
 <url>
+<loc>https://hudi.apache.org/blog/hudi-key-generators/</loc>
+<lastmod>2021-02-13T00:00:00-05:00</lastmod>
+</url>
+<url>
 <loc>https://hudi.apache.org/cn/activity</loc>
 <lastmod>2019-12-30T14:59:57-05:00</lastmod>
 </url>

Reply via email to