This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/asf-site by this push: new 12ebe2bdef [DOCS] Add tags to blog pages (#6638) 12ebe2bdef is described below commit 12ebe2bdef369cf7eb80cb2767e88fbbcb4f10d6 Author: Bhavani Sudha Saktheeswaran <2179254+bhasu...@users.noreply.github.com> AuthorDate: Wed Sep 21 15:18:50 2022 -0700 [DOCS] Add tags to blog pages (#6638) --- README.md | 38 ++++++++++++++++++++++ ...e-Case-for-incremental-processing-on-Hadoop.mdx | 4 +++ ...-Incremental-Processing-Framework-on-Hadoop.mdx | 4 +++ .../blog/2019-05-14-registering-dataset-to-hive.md | 3 ++ .../blog/2019-09-09-ingesting-database-changes.md | 3 ++ website/blog/2019-10-22-Hudi-On-Hops.mdx | 3 ++ ...-Data-on-S3-with-Amazon-EMR-and-Apache-Hudi.mdx | 3 ++ website/blog/2020-01-15-delete-support-in-hudi.md | 4 +++ .../blog/2020-01-20-change-capture-using-aws.md | 5 +++ website/blog/2020-03-22-exporting-hudi-datasets.md | 4 +++ .../blog/2020-04-27-apache-hudi-apache-zepplin.md | 4 +++ ...0-05-28-monitoring-hudi-metrics-with-datadog.md | 4 +++ ...nnounces-Apache-Hudi-as-a-Top-Level-Project.mdx | 3 ++ ...ctional-Data-Lake-at-Uber-Using-Apache-Hudi.mdx | 5 +++ ...-Apache-Hudi-grows-cloud-data-lake-maturity.mdx | 3 ++ .../blog/2020-08-04-PrestoDB-and-Apache-Hudi.mdx | 3 ++ ...18-hudi-incremental-processing-on-data-lakes.md | 5 +++ ...-efficient-migration-of-large-parquet-tables.md | 5 +++ ...2020-08-21-async-compaction-deployment-model.md | 4 +++ ...2020-08-22-ingest-multiple-tables-using-hudi.md | 4 +++ ...020-10-06-cdc-solution-using-hudi-by-nclouds.md | 4 +++ .../2020-10-15-apache-hudi-meets-apache-flink.md | 4 +++ .../2020-10-19-Origins-of-Data-Lake-at-Grofers.mdx | 6 ++++ .../2020-10-19-hudi-meets-aws-emr-and-aws-dms.md | 3 ++ ...Enterprise-at-Data-Summit-Connect-Fall-2020.mdx | 3 ++ ...apture-using-Apache-Hudi-and-Amazon-AMS-EMR.mdx | 5 +++ .../blog/2020-11-11-hudi-indexing-mechanisms.md | 4 +++ ...-11-29-Can-Big-Data-Solutions-Be-Affordable.mdx | 5 +++ ...gh-perf-data-lake-with-hudi-and-alluxio-t3go.md | 6 ++++ website/blog/2021-01-27-hudi-clustering-intro.md | 4 +++ website/blog/2021-02-13-hudi-key-generators.md | 4 +++ ...ravel-operations-in-Hopsworks-Feature-Store.mdx | 6 ++++ ...-Generation-of-Data-Lakes-using-Apache-Hudi.mdx | 4 +++ website/blog/2021-03-01-hudi-file-sizing.md | 4 +++ ...-stream-for-amazon-dynamodb-and-apache-hudi.mdx | 4 +++ ...New-features-from-Apache-hudi-in-Amazon-EMR.mdx | 3 ++ ...-Apache-Spark-and-Apache-Hudi-on-Amazon-EMR.mdx | 4 +++ .../2021-05-12-Experts-primer-on-Apache-Hudi.mdx | 3 ++ ...ow-Uber-gets-data-a-ride-to-its-destination.mdx | 3 ++ ...loying-right-configurations-for-hudi-cleaner.md | 6 +++- ...6-Amazon-Athena-expands-Apache-Hudi-support.mdx | 3 ++ ...e-with-amazon-athena-Read-optimized-queries.mdx | 4 +++ .../2021-07-21-streaming-data-lake-platform.md | 4 +++ ...-lake-evolution-scheme-based-on-Apache-Hudi.mdx | 5 +++ ...ars-Versioned-Feature-Data-with-a-Lakehouse.mdx | 7 ++++ ...cient-Open-Source-Big-Data-Platform-at-Uber.mdx | 7 ++++ .../blog/2021-08-16-kafka-custom-deserializer.md | 6 ++++ .../blog/2021-08-18-improving-marker-mechanism.md | 5 +++ website/blog/2021-08-18-virtual-keys.md | 4 +++ website/blog/2021-08-23-async-clustering.md | 4 +++ website/blog/2021-08-23-s3-events-source.md | 4 +++ ...g-eb-level-data-lake-using-hudi-at-bytedance.md | 3 ++ .../blog/2021-10-05-Data-Platform-2.0-Part-I.mdx | 5 +++ ...abyte-scale-using-AWS-Glue-with-Apache-Hudi.mdx | 5 +++ ...n-building-real-time-data-lake-at-station-B.mdx | 4 +++ ...-at-enterprise-scale-using-the-AWS-platform.mdx | 4 +++ ...-Hudi-Architecture-Tools-and-Best-Practices.mdx | 3 ++ ...se-concurrency-control-are-we-too-optimistic.md | 4 +++ ...udi-0.7.0-and-0.8.0-available-on-Amazon-EMR.mdx | 3 ++ ...hudi-zorder-and-hilbert-space-filling-curves.md | 5 +++ ...es-with-Apache-Hudi-Kafka-Hive-and-Debezium.mdx | 4 +++ ...2022-01-06-apache-hudi-2021-a-year-in-review.md | 4 +++ ...e-data-capture-with-debezium-and-apache-hudi.md | 7 +++- ...nd-How-I-Integrated-Airbyte-and-Apache-Hudi.mdx | 4 +++ ...-lake-efforts-at-Walmart-and-Disney-Hotstar.mdx | 3 ++ ...st-Efficiency-Scale-in-Big-Data-File-Format.mdx | 6 ++++ .../2022-02-02-Onehouse-Commitment-to-Openness.mdx | 4 +++ ...gs-a-fully-managed-lakehouse-to-Apache-Hudi.mdx | 4 +++ ...-transformations-on-Distributed-file-system.mdx | 3 ++ ...ating-Current-Interest-and-Rate-of-Adoption.mdx | 6 ++++ .../2022-02-17-Fresher-Data-Lake-on-AWS-S3.mdx | 4 +++ ...s-core-concepts-from-hudi-persistence-files.mdx | 4 +++ ...on-MSK-Connect-Apache-Flink-and-Apache-Hudi.mdx | 6 ++++ ...ta-using-AWS-Glue-Apache-Hudi-and-Amazon-S3.mdx | 5 ++- ...your-business-with-modern-data-capabilities.mdx | 7 ++++ ...-building-Lakehouse-Architecture-at-Halodoc.mdx | 7 ++-- ...atures-from-Apache-Hudi-0.9.0-on-Amazon-EMR.mdx | 3 ++ ...-in-data-lakehouse-table-format-comparisons.mdx | 5 ++- ...odal-Index-for-the-Lakehouse-in-Apache-Hudi.mdx | 6 +++- ...ecord-deletable-data-lake-using-Apache-Hudi.mdx | 4 +++ ...2022-06-04-Asynchronous-Indexing-Using-Hudi.mdx | 6 +++- ...lumn-Stats-Index-and-Data-Skipping-features.mdx | 6 +++- ...ent-tpc-ds-lakehouse-performance-benchmarks.mdx | 6 +++- ...ild-open-lakehouse-using-apache-hudi-and-dbt.md | 5 +++ ...d-a-serverless-real-time-analytics-platform.mdx | 5 +++ ...udi-to-Build-a-Streaming-Data-Lake-Platform.mdx | 5 +++ ...Apache-Iceberg-Lakehouse-Feature-Comparison.mdx | 7 ++-- ...-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.mdx | 6 ++++ 88 files changed, 415 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index f9fcf4bab8..f688f75f15 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,44 @@ Example: When you change any file in `versioned_docs/version-0.7.0/`, it will on ## Configs Configs can be automatically updated by following these steps documented at ../hudi-utils/README.md +## Blogs + +When adding a new blog, please follow these guidelines. + +1. Every Blog should have the `title`, `authors`, `image`, `tags` in the metadata of the blog. For example the front matter +for a blog should look like below. +``` +--- +title: "Blog title" +author: FirstName LastName +category: blog +image: /assets/images/blog/<image_file> +tags: +- how-to +- deltastreamer +- incremental-processing +- apache hudi +--- +``` +2. The blog can be inline or referring to an external blog. If its an inline blog please save it as `.md` file. +Example for an inline blog - (Build Open Lakehouse using Apache Hudi & dbt)[https://github.com/apache/hudi/blob/asf-site/website/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md]. +If the blog is referring to an external blog you would need to embed the redirect url and save it as a `.mdx` file. +Take a look at this blog for reference - (Apache Hudi vs Delta Lake vs Apache Iceberg - Lakehouse Feature Compariso)[https://raw.githubusercontent.com/apache/hudi/asf-site/website/blog/2022-08-18-Apache-Hudi-vs-Delta-Lake-vs-Apache-Iceberg-Lakehouse-Feature-Comparison.mdx] +3. The image must be uploaded in the path /assets/images/blog/<image_file-name> and should be of standard size 1200 * 600 +4. The tags should be representative of these + 1. tag1 + - how-to (tutorial, recipes, show case how to use feature x) + - use-case (some community users talking about their use-case) + - design (technical articles talking about Hudi internal design/impl) + - performance (involves performance related blogs) + 2. tag 2 + - Represent individual features - clustering, compaction, ingestion, meta-sync etc. + 3. tag 3 + - Source. This is usually the second level domain name for this article gathered from the url link. + For example if the article is https://www.uber.com/blog/cost-efficiency-big-data/ we would use `uber` as the tag here. + Another example - for https://robinhood.engineering/author-balaji-varadarajan-e3f496815ebf we would use + `robinhood` as the tag. For blogs directly contributed to hudi repo, we can use `apache hudi` as the tag. + ## Maintainer Apache Hudi Community diff --git a/website/blog/2016-08-04-The-Case-for-incremental-processing-on-Hadoop.mdx b/website/blog/2016-08-04-The-Case-for-incremental-processing-on-Hadoop.mdx index ef657e7105..3419be8255 100644 --- a/website/blog/2016-08-04-The-Case-for-incremental-processing-on-Hadoop.mdx +++ b/website/blog/2016-08-04-The-Case-for-incremental-processing-on-Hadoop.mdx @@ -4,6 +4,10 @@ authors: - name: Vinoth Chandar category: blog image: /assets/images/blog/2016-08-04-The-Case-for-incremental-processing-on-Hadoop.png +tags: +- use-case +- incremental-processing +- oreilly --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2017-03-12-Hoodie-Uber-Engineerings-Incremental-Processing-Framework-on-Hadoop.mdx b/website/blog/2017-03-12-Hoodie-Uber-Engineerings-Incremental-Processing-Framework-on-Hadoop.mdx index 29c9be1e21..77ad290281 100644 --- a/website/blog/2017-03-12-Hoodie-Uber-Engineerings-Incremental-Processing-Framework-on-Hadoop.mdx +++ b/website/blog/2017-03-12-Hoodie-Uber-Engineerings-Incremental-Processing-Framework-on-Hadoop.mdx @@ -5,6 +5,10 @@ authors: - name: Vinoth Chandar category: blog image: /assets/images/blog/2017-03-12-Hoodie-Uber-Engineerings-Incremental-Processing-Framework-on-Hadoop.png +tags: +- use-case +- incremental-processing +- uber --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2019-05-14-registering-dataset-to-hive.md b/website/blog/2019-05-14-registering-dataset-to-hive.md index 93faadf719..0e8e202151 100644 --- a/website/blog/2019-05-14-registering-dataset-to-hive.md +++ b/website/blog/2019-05-14-registering-dataset-to-hive.md @@ -3,6 +3,9 @@ title: "Registering sample dataset to Hive via beeline" excerpt: "How to manually register HUDI dataset into Hive using beeline" author: vinoth category: blog +tags: +- how-to +- apache hudi --- Hudi hive sync tool typically handles registration of the dataset into Hive metastore. In case, there are issues with quickstart around this, following page shows commands that can be used to do this manually via beeline. diff --git a/website/blog/2019-09-09-ingesting-database-changes.md b/website/blog/2019-09-09-ingesting-database-changes.md index 4a295d067b..2c8b068e5a 100644 --- a/website/blog/2019-09-09-ingesting-database-changes.md +++ b/website/blog/2019-09-09-ingesting-database-changes.md @@ -3,6 +3,9 @@ title: "Ingesting Database changes via Sqoop/Hudi" excerpt: "Learn how to ingesting changes from a HUDI dataset using Sqoop/Hudi" author: vinoth category: blog +tags: +- how-to +- apache hudi --- Very simple in just 2 steps. diff --git a/website/blog/2019-10-22-Hudi-On-Hops.mdx b/website/blog/2019-10-22-Hudi-On-Hops.mdx index 643e27802a..d1c9a119e5 100644 --- a/website/blog/2019-10-22-Hudi-On-Hops.mdx +++ b/website/blog/2019-10-22-Hudi-On-Hops.mdx @@ -3,6 +3,9 @@ title: "Hudi On Hops" authors: - name: NETSANET GEBRETSADKAN KIDANE category: blog +tags: +- blog +- diva-portal --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2019-11-15-New-Insert-Update-Delete-Data-on-S3-with-Amazon-EMR-and-Apache-Hudi.mdx b/website/blog/2019-11-15-New-Insert-Update-Delete-Data-on-S3-with-Amazon-EMR-and-Apache-Hudi.mdx index 6576ab1da9..ae95d85c4b 100644 --- a/website/blog/2019-11-15-New-Insert-Update-Delete-Data-on-S3-with-Amazon-EMR-and-Apache-Hudi.mdx +++ b/website/blog/2019-11-15-New-Insert-Update-Delete-Data-on-S3-with-Amazon-EMR-and-Apache-Hudi.mdx @@ -4,6 +4,9 @@ authors: - name: Danilo Poccia category: blog image: /assets/images/blog/aws.jpg +tags: +- blog +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-01-15-delete-support-in-hudi.md b/website/blog/2020-01-15-delete-support-in-hudi.md index 2b5fb9327c..c9af8dcbbb 100644 --- a/website/blog/2020-01-15-delete-support-in-hudi.md +++ b/website/blog/2020-01-15-delete-support-in-hudi.md @@ -3,6 +3,10 @@ title: "Delete support in Hudi" excerpt: "Deletes are supported at a record level in Hudi with 0.5.1 release. This blog is a “how to” blog on how to delete records in hudi." author: shivnarayan category: blog +tags: +- how-to +- delete +- apache hudi --- Deletes are supported at a record level in Hudi with 0.5.1 release. This blog is a "how to" blog on how to delete records in hudi. Deletes can be done with 3 flavors: Hudi RDD APIs, with Spark data source and with DeltaStreamer. diff --git a/website/blog/2020-01-20-change-capture-using-aws.md b/website/blog/2020-01-20-change-capture-using-aws.md index a757bca98e..fd6316e1b5 100644 --- a/website/blog/2020-01-20-change-capture-using-aws.md +++ b/website/blog/2020-01-20-change-capture-using-aws.md @@ -4,6 +4,11 @@ excerpt: "In this blog, we will build an end-end solution for capturing changes author: vinoth category: blog image: /assets/images/blog/change-capture-architecture.png +tags: +- how-to +- change-data-capture +- cdc +- apache hudi --- One of the core use-cases for Apache Hudi is enabling seamless, efficient database ingestion to your data lake. Even though a lot has been talked about and even users already adopting this model, content on how to go about this is sparse. diff --git a/website/blog/2020-03-22-exporting-hudi-datasets.md b/website/blog/2020-03-22-exporting-hudi-datasets.md index 0811d017c1..b3d859a220 100644 --- a/website/blog/2020-03-22-exporting-hudi-datasets.md +++ b/website/blog/2020-03-22-exporting-hudi-datasets.md @@ -3,6 +3,10 @@ title: "Export Hudi datasets as a copy or as different formats" excerpt: "Learn how to copy or export HUDI dataset in various formats." author: rxu category: blog +tags: +- how-to +- snapshot-exporter +- apache hudi --- ### Copy to Hudi dataset diff --git a/website/blog/2020-04-27-apache-hudi-apache-zepplin.md b/website/blog/2020-04-27-apache-hudi-apache-zepplin.md index 7dfcf35a61..51b8ddbf23 100644 --- a/website/blog/2020-04-27-apache-hudi-apache-zepplin.md +++ b/website/blog/2020-04-27-apache-hudi-apache-zepplin.md @@ -3,6 +3,10 @@ title: "Apache Hudi Support on Apache Zeppelin" excerpt: "Integrating HUDI's real-time and read-optimized query capabilities into Apache Zeppelin’s notebook" author: leesf category: blog +tags: +- how-to +- apache zeppelin +- apache hudi --- diff --git a/website/blog/2020-05-28-monitoring-hudi-metrics-with-datadog.md b/website/blog/2020-05-28-monitoring-hudi-metrics-with-datadog.md index 369775463f..5a9058f6fd 100644 --- a/website/blog/2020-05-28-monitoring-hudi-metrics-with-datadog.md +++ b/website/blog/2020-05-28-monitoring-hudi-metrics-with-datadog.md @@ -3,6 +3,10 @@ title: "Monitor Hudi metrics with Datadog" excerpt: "Introducing the feature of reporting Hudi metrics via Datadog HTTP API" author: rxu category: blog +tags: +- how-to +- metrics +- apache hudi --- ## Availability diff --git a/website/blog/2020-06-04-The-Apache-Software-Foundation-Announces-Apache-Hudi-as-a-Top-Level-Project.mdx b/website/blog/2020-06-04-The-Apache-Software-Foundation-Announces-Apache-Hudi-as-a-Top-Level-Project.mdx index 4fa1845e11..bd0d61be95 100644 --- a/website/blog/2020-06-04-The-Apache-Software-Foundation-Announces-Apache-Hudi-as-a-Top-Level-Project.mdx +++ b/website/blog/2020-06-04-The-Apache-Software-Foundation-Announces-Apache-Hudi-as-a-Top-Level-Project.mdx @@ -2,6 +2,9 @@ title: "The Apache Software Foundation Announces Apache® Hudi™ as a Top-Level Project" category: blog image: /assets/images/asf_logo.svg +tags: +- blog +- apache --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-06-09-Building-a-Large-scale-Transactional-Data-Lake-at-Uber-Using-Apache-Hudi.mdx b/website/blog/2020-06-09-Building-a-Large-scale-Transactional-Data-Lake-at-Uber-Using-Apache-Hudi.mdx index 9e4b254d44..52bb6c79f0 100644 --- a/website/blog/2020-06-09-Building-a-Large-scale-Transactional-Data-Lake-at-Uber-Using-Apache-Hudi.mdx +++ b/website/blog/2020-06-09-Building-a-Large-scale-Transactional-Data-Lake-at-Uber-Using-Apache-Hudi.mdx @@ -4,6 +4,11 @@ authors: - name: Nishith Agarwal category: blog image: /assets/images/blog/2020-06-09-Building-a-Large-scale-Transactional-Data-Lake-at-Uber-Using-Apache-Hudi.png +tags: +- use-case +- datalake +- analytics at-scale +- uber --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-06-16-Apache-Hudi-grows-cloud-data-lake-maturity.mdx b/website/blog/2020-06-16-Apache-Hudi-grows-cloud-data-lake-maturity.mdx index c1d6a2c294..99dbeee8c3 100644 --- a/website/blog/2020-06-16-Apache-Hudi-grows-cloud-data-lake-maturity.mdx +++ b/website/blog/2020-06-16-Apache-Hudi-grows-cloud-data-lake-maturity.mdx @@ -4,6 +4,9 @@ authors: - name: Sean Michael Kerner category: blog image: /assets/images/blog/2020-06-16-Apache-Hudi-grows-cloud-data-lake-maturity.jpeg +tags: +- blog +- techtarget --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-08-04-PrestoDB-and-Apache-Hudi.mdx b/website/blog/2020-08-04-PrestoDB-and-Apache-Hudi.mdx index 25d6d4a226..23a9ced163 100644 --- a/website/blog/2020-08-04-PrestoDB-and-Apache-Hudi.mdx +++ b/website/blog/2020-08-04-PrestoDB-and-Apache-Hudi.mdx @@ -5,6 +5,9 @@ authors: - name: Brandon Scheller category: blog image: /assets/images/blog/2020-08-04-PrestoDB-and-Apache-Hudi.png +tags: +- blog +- prestodb --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-08-18-hudi-incremental-processing-on-data-lakes.md b/website/blog/2020-08-18-hudi-incremental-processing-on-data-lakes.md index b0566a9baa..bc9ebe148b 100644 --- a/website/blog/2020-08-18-hudi-incremental-processing-on-data-lakes.md +++ b/website/blog/2020-08-18-hudi-incremental-processing-on-data-lakes.md @@ -4,6 +4,11 @@ excerpt: "How Apache Hudi provides ability for incremental data processing." author: vinoyang category: blog image: /assets/images/blog/incr-processing/image7.png +tags: +- blog +- datalake +- incremental-processing +- apache hudi --- ### NOTE: This article is a translation of the infoq.cn article, found [here](https://www.infoq.cn/article/CAgIDpfJBVcJHKJLSbhe), with minor edits diff --git a/website/blog/2020-08-20-efficient-migration-of-large-parquet-tables.md b/website/blog/2020-08-20-efficient-migration-of-large-parquet-tables.md index 6bf5e878f0..40e3350734 100644 --- a/website/blog/2020-08-20-efficient-migration-of-large-parquet-tables.md +++ b/website/blog/2020-08-20-efficient-migration-of-large-parquet-tables.md @@ -4,6 +4,11 @@ excerpt: "Migrating a large parquet table to Apache Hudi without having to rewri author: vbalaji category: blog image: /assets/images/blog/2020-08-20-skeleton.png +tags: +- how-to +- migration +- bootstrap +- apache hudi --- We will look at how to migrate a large parquet table to Hudi without having to rewrite the entire dataset. diff --git a/website/blog/2020-08-21-async-compaction-deployment-model.md b/website/blog/2020-08-21-async-compaction-deployment-model.md index 5e6eec2657..8fbba53c28 100644 --- a/website/blog/2020-08-21-async-compaction-deployment-model.md +++ b/website/blog/2020-08-21-async-compaction-deployment-model.md @@ -3,6 +3,10 @@ title: "Async Compaction Deployment Models" excerpt: "Mechanisms for executing compaction jobs in Hudi asynchronously" author: vbalaji category: blog +tags: +- how-to +- compaction +- apache hudi --- We will look at different deployment models for executing compactions asynchronously. diff --git a/website/blog/2020-08-22-ingest-multiple-tables-using-hudi.md b/website/blog/2020-08-22-ingest-multiple-tables-using-hudi.md index a340edf5a7..889ec39193 100644 --- a/website/blog/2020-08-22-ingest-multiple-tables-using-hudi.md +++ b/website/blog/2020-08-22-ingest-multiple-tables-using-hudi.md @@ -3,6 +3,10 @@ title: "Ingest multiple tables using Hudi" excerpt: "Ingesting multiple tables using Hudi at a single go is now possible. This blog gives a detailed explanation of how to achieve the same using `HoodieMultiTableDeltaStreamer.java`" author: pratyakshsharma category: blog +tags: +- how-to +- multi-deltastreamer +- apache hudi --- When building a change data capture pipeline for already existing or newly created relational databases, one of the most common problems that one faces is simplifying the onboarding process for multiple tables. Ingesting multiple tables to Hudi dataset at a single go is now possible using `HoodieMultiTableDeltaStreamer` class which is a wrapper on top of the more popular `HoodieDeltaStreamer` class. Currently `HoodieMultiTableDeltaStreamer` supports **COPY_ON_WRITE** storage type only an [...] diff --git a/website/blog/2020-10-06-cdc-solution-using-hudi-by-nclouds.md b/website/blog/2020-10-06-cdc-solution-using-hudi-by-nclouds.md index 1834927200..5277957af9 100644 --- a/website/blog/2020-10-06-cdc-solution-using-hudi-by-nclouds.md +++ b/website/blog/2020-10-06-cdc-solution-using-hudi-by-nclouds.md @@ -4,6 +4,10 @@ excerpt: "Solution to set up a new data and analytics platform using Apache Hudi author: nclouds category: blog image: /assets/images/blog/2020-10-06-cdc-solution-using-hudi-by-nclouds.jpg +tags: +- blog +- apache flink +- apache hudi --- This [blog](https://aws.amazon.com/blogs/apn/how-nclouds-helps-accelerate-data-delivery-with-apache-hudi-on-amazon-emr/) published by nClouds in partnership with AWS shows how to build a CDC pipeline using Apache Hudi on Amazon EMR and other managed services like Amazon RDS and AWS DMS, including Amazon QuickSight for data visualization. \ No newline at end of file diff --git a/website/blog/2020-10-15-apache-hudi-meets-apache-flink.md b/website/blog/2020-10-15-apache-hudi-meets-apache-flink.md index 81e89f49f1..4abffb3c13 100644 --- a/website/blog/2020-10-15-apache-hudi-meets-apache-flink.md +++ b/website/blog/2020-10-15-apache-hudi-meets-apache-flink.md @@ -4,6 +4,10 @@ excerpt: "The design and latest progress of the integration of Apache Hudi and A author: wangxianghu category: blog image: /assets/images/blog/2020-10-15-apache-hudi-meets-apache-flink.png +tags: +- blog +- apache flink +- apache hudi --- Apache Hudi (Hudi for short) is a data lake framework created at Uber. Hudi joined the Apache incubator for incubation in January 2019, and was promoted to the top Apache project in May 2020. It is one of the most popular data lake frameworks. diff --git a/website/blog/2020-10-19-Origins-of-Data-Lake-at-Grofers.mdx b/website/blog/2020-10-19-Origins-of-Data-Lake-at-Grofers.mdx index e494c9a931..3a84c0d7ca 100644 --- a/website/blog/2020-10-19-Origins-of-Data-Lake-at-Grofers.mdx +++ b/website/blog/2020-10-19-Origins-of-Data-Lake-at-Grofers.mdx @@ -4,6 +4,12 @@ authors: - name: Akshay Agarwal category: blog image: /assets/images/blog/2020-10-19-Origins-of-Data-Lake-at-Grofers.gif +tags: +- use-case +- datalake +- change-data-capture +- cdc +- grofers --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-10-19-hudi-meets-aws-emr-and-aws-dms.md b/website/blog/2020-10-19-hudi-meets-aws-emr-and-aws-dms.md index a31aae0619..6da647b91e 100644 --- a/website/blog/2020-10-19-hudi-meets-aws-emr-and-aws-dms.md +++ b/website/blog/2020-10-19-hudi-meets-aws-emr-and-aws-dms.md @@ -4,6 +4,9 @@ excerpt: "AWS blog showing how to build a CDC pipeline that captures data from a author: aws category: blog image: /assets/images/blog/2020-10-19-hudi-meets-aws-emr-and-aws-dms.jpeg +tags: +- blog +- apache hudi --- This [blog](https://aws.amazon.com/blogs/big-data/apply-record-level-changes-from-relational-databases-to-amazon-s3-data-lake-using-apache-hudi-on-amazon-emr-and-aws-database-migration-service/) published by AWS shows how to build a CDC pipeline that captures data from an Amazon Relational Database Service (Amazon RDS) for MySQL database using AWS Database Migration Service (AWS DMS) and applies those changes to a dataset in Amazon S3 using Apache Hudi on Amazon EMR. \ No newline at end of file diff --git a/website/blog/2020-10-21-Architecting-Data-Lakes-for-the-Modern-Enterprise-at-Data-Summit-Connect-Fall-2020.mdx b/website/blog/2020-10-21-Architecting-Data-Lakes-for-the-Modern-Enterprise-at-Data-Summit-Connect-Fall-2020.mdx index 92b0d9554c..57e73cd2db 100644 --- a/website/blog/2020-10-21-Architecting-Data-Lakes-for-the-Modern-Enterprise-at-Data-Summit-Connect-Fall-2020.mdx +++ b/website/blog/2020-10-21-Architecting-Data-Lakes-for-the-Modern-Enterprise-at-Data-Summit-Connect-Fall-2020.mdx @@ -4,6 +4,9 @@ authors: - name: Stephanie Simone category: blog image: /assets/images/blog/data-summit-connect.jpeg +tags: +- blog +- dbta --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-10-21-Data-Lake-Change-Capture-using-Apache-Hudi-and-Amazon-AMS-EMR.mdx b/website/blog/2020-10-21-Data-Lake-Change-Capture-using-Apache-Hudi-and-Amazon-AMS-EMR.mdx index 834d3dc4ae..4f859d7560 100644 --- a/website/blog/2020-10-21-Data-Lake-Change-Capture-using-Apache-Hudi-and-Amazon-AMS-EMR.mdx +++ b/website/blog/2020-10-21-Data-Lake-Change-Capture-using-Apache-Hudi-and-Amazon-AMS-EMR.mdx @@ -4,6 +4,11 @@ authors: - name: Manoj Kukreja category: blog image: /assets/images/blog/2020-10-21-Data-Lake-Change-Capture-using-Apache-Hudi-and-Amazon-AMS-EMR.jpeg +tags: +- how-to +- change-data-capture +- cdc +- towardsdatascience --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-11-11-hudi-indexing-mechanisms.md b/website/blog/2020-11-11-hudi-indexing-mechanisms.md index 11e47497b2..4ffc01ed63 100644 --- a/website/blog/2020-11-11-hudi-indexing-mechanisms.md +++ b/website/blog/2020-11-11-hudi-indexing-mechanisms.md @@ -4,6 +4,10 @@ excerpt: "Detailing different indexing mechanisms in Hudi and when to use each o author: vinoth category: blog image: /assets/images/blog/hudi-indexes/with-and-without-index.png +tags: +- how-to +- indexing +- apache hudi --- Apache Hudi employs an index to locate the file group, that an update/delete belongs to. For Copy-On-Write tables, this enables diff --git a/website/blog/2020-11-29-Can-Big-Data-Solutions-Be-Affordable.mdx b/website/blog/2020-11-29-Can-Big-Data-Solutions-Be-Affordable.mdx index 50357359b8..8aacd8c1d9 100644 --- a/website/blog/2020-11-29-Can-Big-Data-Solutions-Be-Affordable.mdx +++ b/website/blog/2020-11-29-Can-Big-Data-Solutions-Be-Affordable.mdx @@ -2,6 +2,11 @@ title: "Can Big Data Solutions Be Affordable?" category: blog image: /assets/images/blog/2020-11-29-Can-Big-Data-Solutions-Be-Affordable.jpg +tags: +- blog +- big-data +- near real-time analytics +- analyticsinsight --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2020-12-01-high-perf-data-lake-with-hudi-and-alluxio-t3go.md b/website/blog/2020-12-01-high-perf-data-lake-with-hudi-and-alluxio-t3go.md index 32bbc9d3d7..22ce068e44 100644 --- a/website/blog/2020-12-01-high-perf-data-lake-with-hudi-and-alluxio-t3go.md +++ b/website/blog/2020-12-01-high-perf-data-lake-with-hudi-and-alluxio-t3go.md @@ -4,6 +4,12 @@ excerpt: "How T3Go’s high-performance data lake using Apache Hudi and Alluxio author: t3go category: blog image: /assets/images/blog/2020-12-01-t3go-architecture.png +tags: +- use-case +- near real-time analytics +- incremental-processing +- caching +- apache hudi --- ## Building High-Performance Data Lake Using Apache Hudi and Alluxio at T3Go diff --git a/website/blog/2021-01-27-hudi-clustering-intro.md b/website/blog/2021-01-27-hudi-clustering-intro.md index 3a8c1fa914..f1af4433e5 100644 --- a/website/blog/2021-01-27-hudi-clustering-intro.md +++ b/website/blog/2021-01-27-hudi-clustering-intro.md @@ -4,6 +4,10 @@ excerpt: "Introduce clustering feature to change data layout" author: satish.kotha category: blog image: /assets/images/blog/2021-01-27-hudi-clustering-intro.png +tags: +- design +- clustering +- apache hudi --- ## Background diff --git a/website/blog/2021-02-13-hudi-key-generators.md b/website/blog/2021-02-13-hudi-key-generators.md index 405793e8af..ff8ac74e2d 100644 --- a/website/blog/2021-02-13-hudi-key-generators.md +++ b/website/blog/2021-02-13-hudi-key-generators.md @@ -3,6 +3,10 @@ title: "Apache Hudi Key Generators" excerpt: "Different key generators available with Apache Hudi" author: shivnarayan category: blog +tags: +- blog +- key-generators +- apache hudi --- Every record in Hudi is uniquely identified by a primary key, which is a pair of record key and partition path where diff --git a/website/blog/2021-02-24-Time-travel-operations-in-Hopsworks-Feature-Store.mdx b/website/blog/2021-02-24-Time-travel-operations-in-Hopsworks-Feature-Store.mdx index 2a0c486737..fc46e124f4 100644 --- a/website/blog/2021-02-24-Time-travel-operations-in-Hopsworks-Feature-Store.mdx +++ b/website/blog/2021-02-24-Time-travel-operations-in-Hopsworks-Feature-Store.mdx @@ -2,6 +2,12 @@ title: "Time travel operations in Hopsworks Feature Store" category: blog image: /assets/images/blog/2021-02-24-featurestore_incremental_pull.png +tags: +- use-case +- incremental-processing +- feature-store +- time-travel +- hopsworks --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-03-01-Data-Lakehouse-Building-the-Next-Generation-of-Data-Lakes-using-Apache-Hudi.mdx b/website/blog/2021-03-01-Data-Lakehouse-Building-the-Next-Generation-of-Data-Lakes-using-Apache-Hudi.mdx index ad7af6c0b6..60a89fa054 100644 --- a/website/blog/2021-03-01-Data-Lakehouse-Building-the-Next-Generation-of-Data-Lakes-using-Apache-Hudi.mdx +++ b/website/blog/2021-03-01-Data-Lakehouse-Building-the-Next-Generation-of-Data-Lakes-using-Apache-Hudi.mdx @@ -5,6 +5,10 @@ authors: - name: Brandon Stanley category: blog image: /assets/images/blog/2021-03-01-Data-Lakehouse-Building-the-Next-Generation-of-Data-Lakes-using-Apache-Hudi.png +tags: +- blog +- data-lakehouse +- medium --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-03-01-hudi-file-sizing.md b/website/blog/2021-03-01-hudi-file-sizing.md index 7c65b3e369..1b71eee854 100644 --- a/website/blog/2021-03-01-hudi-file-sizing.md +++ b/website/blog/2021-03-01-hudi-file-sizing.md @@ -4,6 +4,10 @@ excerpt: "Maintaining well-sized files can improve query performance significant author: shivnarayan category: blog image: /assets/images/blog/2021-03-01-hudi-file-sizing.png +tags: +- design +- file-sizing +- apache hudi --- Apache Hudi is a data lake platform technology that provides several functionalities needed to build and manage data lakes. diff --git a/website/blog/2021-03-04-Build-a-data-lake-using-amazon-kinesis-data-stream-for-amazon-dynamodb-and-apache-hudi.mdx b/website/blog/2021-03-04-Build-a-data-lake-using-amazon-kinesis-data-stream-for-amazon-dynamodb-and-apache-hudi.mdx index 2b29471ebc..caaa16e02e 100644 --- a/website/blog/2021-03-04-Build-a-data-lake-using-amazon-kinesis-data-stream-for-amazon-dynamodb-and-apache-hudi.mdx +++ b/website/blog/2021-03-04-Build-a-data-lake-using-amazon-kinesis-data-stream-for-amazon-dynamodb-and-apache-hudi.mdx @@ -6,6 +6,10 @@ authors: - name: Saurabh Shrivastava category: blog image: /assets/images/blog/2021-03-04-build-data-lake-using-amazon-kinesis-for-amazon-dynamodb-and-apache-hudi.jpeg +tags: +- how-to +- streaming ingestion +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-03-11-New-features-from-Apache-hudi-in-Amazon-EMR.mdx b/website/blog/2021-03-11-New-features-from-Apache-hudi-in-Amazon-EMR.mdx index 1e3a1379f5..efb4f3f1fb 100644 --- a/website/blog/2021-03-11-New-features-from-Apache-hudi-in-Amazon-EMR.mdx +++ b/website/blog/2021-03-11-New-features-from-Apache-hudi-in-Amazon-EMR.mdx @@ -4,6 +4,9 @@ authors: - name: Udit Mehrotra category: blog image: /assets/images/blog/aws.jpg +tags: +- blog +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-04-12-Build-Slowly-Changing-Dimensions-Type-2-SCD2-with-Apache-Spark-and-Apache-Hudi-on-Amazon-EMR.mdx b/website/blog/2021-04-12-Build-Slowly-Changing-Dimensions-Type-2-SCD2-with-Apache-Spark-and-Apache-Hudi-on-Amazon-EMR.mdx index 927e31d15f..f175f7f529 100644 --- a/website/blog/2021-04-12-Build-Slowly-Changing-Dimensions-Type-2-SCD2-with-Apache-Spark-and-Apache-Hudi-on-Amazon-EMR.mdx +++ b/website/blog/2021-04-12-Build-Slowly-Changing-Dimensions-Type-2-SCD2-with-Apache-Spark-and-Apache-Hudi-on-Amazon-EMR.mdx @@ -4,6 +4,10 @@ authors: - name: David Greenshtein category: blog image: /assets/images/blog/aws.jpg +tags: +- how-to +- scd2 +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-05-12-Experts-primer-on-Apache-Hudi.mdx b/website/blog/2021-05-12-Experts-primer-on-Apache-Hudi.mdx index 4a27ab1627..712c9668ae 100644 --- a/website/blog/2021-05-12-Experts-primer-on-Apache-Hudi.mdx +++ b/website/blog/2021-05-12-Experts-primer-on-Apache-Hudi.mdx @@ -4,6 +4,9 @@ authors: - name: Stephanie Simone category: blog image: /assets/images/blog/data-summit-connect.jpeg +tags: +- blog +- dbta --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-06-04-Apache-Hudi-How-Uber-gets-data-a-ride-to-its-destination.mdx b/website/blog/2021-06-04-Apache-Hudi-How-Uber-gets-data-a-ride-to-its-destination.mdx index db144761ac..edebf16141 100644 --- a/website/blog/2021-06-04-Apache-Hudi-How-Uber-gets-data-a-ride-to-its-destination.mdx +++ b/website/blog/2021-06-04-Apache-Hudi-How-Uber-gets-data-a-ride-to-its-destination.mdx @@ -3,6 +3,9 @@ title: "Apache Hudi: How Uber gets data a ride to its destination" authors: - name: Joe McKendrick category: blog +tags: +- blog +- rtinsights --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-06-10-employing-right-configurations-for-hudi-cleaner.md b/website/blog/2021-06-10-employing-right-configurations-for-hudi-cleaner.md index 6094a41d1e..e9f5ce6e5a 100644 --- a/website/blog/2021-06-10-employing-right-configurations-for-hudi-cleaner.md +++ b/website/blog/2021-06-10-employing-right-configurations-for-hudi-cleaner.md @@ -4,6 +4,10 @@ excerpt: "Ensuring isolation between Hudi writers and readers using `HoodieClean author: pratyakshsharma category: blog image: /assets/images/blog/hoodie-cleaner/Initial_timeline.png +tags: +- how-to +- cleaner-service +- apache hudi --- Apache Hudi provides snapshot isolation between writers and readers. This is made possible by Hudi’s MVCC concurrency model. In this blog, we will explain how to employ the right configurations to manage multiple file versions. Furthermore, we will discuss mechanisms available to users on how to maintain just the required number of old file versions so that long running readers do not fail. @@ -105,4 +109,4 @@ You can find more details and the relevant code for these commands in [`org.apac Work is currently going on for introducing a new cleaning policy based on time elapsed. This will help in achieving a consistent retention throughout regardless of how frequently ingestion happens. You may track the progress [here](https://issues.apache.org/jira/browse/HUDI-349). -We hope this blog gives you an idea about how to configure the Hudi cleaner and the supported cleaning policies. Please visit the [blog section](https://hudi.apache.org/blog) for a deeper understanding of various Hudi concepts. Cheers! \ No newline at end of file +We hope this blog gives you an idea about how to configure the Hudi cleaner and the supported cleaning policies. Please visit the [blog section](https://hudi.apache.org/blog) for a deeper understanding of various Hudi concepts. Cheers! diff --git a/website/blog/2021-07-16-Amazon-Athena-expands-Apache-Hudi-support.mdx b/website/blog/2021-07-16-Amazon-Athena-expands-Apache-Hudi-support.mdx index 21cf380e8a..a086ff9ed7 100644 --- a/website/blog/2021-07-16-Amazon-Athena-expands-Apache-Hudi-support.mdx +++ b/website/blog/2021-07-16-Amazon-Athena-expands-Apache-Hudi-support.mdx @@ -2,6 +2,9 @@ title: "Amazon Athena expands Apache Hudi support" category: blog image: /assets/images/blog/aws.jpg +tags: +- blog +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-07-16-Query-apache-hudi-dataset-in-an-amazon-S3-data-lake-with-amazon-athena-Read-optimized-queries.mdx b/website/blog/2021-07-16-Query-apache-hudi-dataset-in-an-amazon-S3-data-lake-with-amazon-athena-Read-optimized-queries.mdx index d565c35fa3..afbfe4d1e2 100644 --- a/website/blog/2021-07-16-Query-apache-hudi-dataset-in-an-amazon-S3-data-lake-with-amazon-athena-Read-optimized-queries.mdx +++ b/website/blog/2021-07-16-Query-apache-hudi-dataset-in-an-amazon-S3-data-lake-with-amazon-athena-Read-optimized-queries.mdx @@ -6,6 +6,10 @@ authors: - name: Imtiaz Sayed category: blog image: /assets/images/blog/2021-07-16-query-hudi-using-athena-ro-queries.png +tags: +- how-to +- read-optimized-queries +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-07-21-streaming-data-lake-platform.md b/website/blog/2021-07-21-streaming-data-lake-platform.md index 2cfa758226..b298856a6a 100644 --- a/website/blog/2021-07-21-streaming-data-lake-platform.md +++ b/website/blog/2021-07-21-streaming-data-lake-platform.md @@ -4,6 +4,10 @@ excerpt: "It's been called many things. But, we have always been building a data author: vinoth category: blog image: /assets/images/blog/hudi_streaming.png +tags: +- datalake-platform +- blog +- apache hudi --- As early as 2016, we set out a [bold, new vision](https://www.oreilly.com/content/ubers-case-for-incremental-processing-on-hadoop/) reimagining batch data processing through a new “**incremental**” data processing stack - alongside the existing batch and streaming stacks. diff --git a/website/blog/2021-07-26-Baixin-banksreal-time-data-lake-evolution-scheme-based-on-Apache-Hudi.mdx b/website/blog/2021-07-26-Baixin-banksreal-time-data-lake-evolution-scheme-based-on-Apache-Hudi.mdx index 903227b3c3..fed7574cbe 100644 --- a/website/blog/2021-07-26-Baixin-banksreal-time-data-lake-evolution-scheme-based-on-Apache-Hudi.mdx +++ b/website/blog/2021-07-26-Baixin-banksreal-time-data-lake-evolution-scheme-based-on-Apache-Hudi.mdx @@ -2,6 +2,11 @@ title: "Baixin bank’s real-time data lake evolution scheme based on Apache Hudi" category: blog image: /assets/images/blog/2021-07-26-baixin-bank-real-time-data-lake.png +tags: +- use-case +- real-time-datalake +- incremental-processing +- developpaper --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-08-03-MLOps-Wars-Versioned-Feature-Data-with-a-Lakehouse.mdx b/website/blog/2021-08-03-MLOps-Wars-Versioned-Feature-Data-with-a-Lakehouse.mdx index efb341a1a0..44d6a87705 100644 --- a/website/blog/2021-08-03-MLOps-Wars-Versioned-Feature-Data-with-a-Lakehouse.mdx +++ b/website/blog/2021-08-03-MLOps-Wars-Versioned-Feature-Data-with-a-Lakehouse.mdx @@ -5,6 +5,13 @@ authors: - name: Jim Dowling category: blog image: /assets/images/blog/2021-08-03-mlops-wars.png +tags: +- use-case +- mlops +- feature-store +- incremental-processing +- time-travel +- logicalclocks --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-08-11-Cost-Efficient-Open-Source-Big-Data-Platform-at-Uber.mdx b/website/blog/2021-08-11-Cost-Efficient-Open-Source-Big-Data-Platform-at-Uber.mdx index a5a0ed2456..9ac51fb26f 100644 --- a/website/blog/2021-08-11-Cost-Efficient-Open-Source-Big-Data-Platform-at-Uber.mdx +++ b/website/blog/2021-08-11-Cost-Efficient-Open-Source-Big-Data-Platform-at-Uber.mdx @@ -5,6 +5,13 @@ authors: - name: Mohammad Islam category: blog image: /assets/images/blog/2021-08-11-cost-efficient-open-source-big-data-platform-at-uber.png +tags: +- cost-efficiency +- optimization +- bigdata +- data-platform +- incremental-processing +- uber --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-08-16-kafka-custom-deserializer.md b/website/blog/2021-08-16-kafka-custom-deserializer.md index cbc170352c..c8146b6343 100644 --- a/website/blog/2021-08-16-kafka-custom-deserializer.md +++ b/website/blog/2021-08-16-kafka-custom-deserializer.md @@ -4,6 +4,12 @@ excerpt: "Evolve schema used in Kafkasource of DeltaStreamer to keep data up to author: sbernauer category: blog image: /assets/images/blog/hudi_schemaevolution.png +tags: +- design +- deltastreamer +- schema +- apache hudi +- apache kafka --- The schema used for data exchange between services can change rapidly with new business requirements. diff --git a/website/blog/2021-08-18-improving-marker-mechanism.md b/website/blog/2021-08-18-improving-marker-mechanism.md index 3f81269497..db10831e8c 100644 --- a/website/blog/2021-08-18-improving-marker-mechanism.md +++ b/website/blog/2021-08-18-improving-marker-mechanism.md @@ -4,6 +4,11 @@ excerpt: "We introduce a new marker mechanism leveraging the timeline server to author: yihua category: blog image: /assets/images/blog/marker-mechanism/timeline-server-based-marker-mechanism.png +tags: +- design +- timeline-server +- markers +- apache hudi --- Hudi supports fully automatic cleanup of uncommitted data on storage during its write operations. Write operations in an Apache Hudi table use markers to efficiently track the data files written to storage. In this blog, we dive into the design of the existing direct marker file mechanism and explain its performance problems on cloud storage like AWS S3 for diff --git a/website/blog/2021-08-18-virtual-keys.md b/website/blog/2021-08-18-virtual-keys.md index 57e44da270..02b7468c2e 100644 --- a/website/blog/2021-08-18-virtual-keys.md +++ b/website/blog/2021-08-18-virtual-keys.md @@ -3,6 +3,10 @@ title: "Adding support for Virtual Keys in Hudi" excerpt: "Supporting Virtual keys in Hudi for reducing storage overhead" author: shivnarayan category: blog +tags: +- design +- metadata +- apache hudi --- Apache Hudi helps you build and manage data lakes with different table types, config knobs to cater to everyone's need. diff --git a/website/blog/2021-08-23-async-clustering.md b/website/blog/2021-08-23-async-clustering.md index bbd361b627..64cadd9e41 100644 --- a/website/blog/2021-08-23-async-clustering.md +++ b/website/blog/2021-08-23-async-clustering.md @@ -4,6 +4,10 @@ excerpt: "How to setup Hudi for asynchronous clustering" author: codope category: blog image: /assets/images/blog/clustering/example_perf_improvement.png +tags: +- design +- clustering +- apache hudi --- In one of the [previous blog](/blog/2021/01/27/hudi-clustering-intro) posts, we introduced a new diff --git a/website/blog/2021-08-23-s3-events-source.md b/website/blog/2021-08-23-s3-events-source.md index 541a9c30f0..79989ef9de 100644 --- a/website/blog/2021-08-23-s3-events-source.md +++ b/website/blog/2021-08-23-s3-events-source.md @@ -4,6 +4,10 @@ excerpt: "From listing to log-based approach, a reliable way of ingesting data f author: codope category: blog image: /assets/images/blog/s3_events_source_design.png +tags: +- design +- deltastreamer +- apache hudi --- In this post we will talk about a new deltastreamer source which reliably and efficiently processes new data files as they arrive in AWS S3. diff --git a/website/blog/2021-09-01-building-eb-level-data-lake-using-hudi-at-bytedance.md b/website/blog/2021-09-01-building-eb-level-data-lake-using-hudi-at-bytedance.md index c7a9cdc692..53c3a92f46 100644 --- a/website/blog/2021-09-01-building-eb-level-data-lake-using-hudi-at-bytedance.md +++ b/website/blog/2021-09-01-building-eb-level-data-lake-using-hudi-at-bytedance.md @@ -4,6 +4,9 @@ excerpt: "Ziyue Guan from Bytedance shares the production experience of building author: Ziyue Guan, translated to English by yihua category: blog image: /assets/images/blog/bytedance_hudi.png +tags: +- use-case +- apache hudi --- Ziyue Guan from Bytedance shares the experience of building an ExaByte(EB)-level data lake using Apache Hudi at Bytedance. diff --git a/website/blog/2021-10-05-Data-Platform-2.0-Part-I.mdx b/website/blog/2021-10-05-Data-Platform-2.0-Part-I.mdx index 4b211c1225..28154cbf81 100644 --- a/website/blog/2021-10-05-Data-Platform-2.0-Part-I.mdx +++ b/website/blog/2021-10-05-Data-Platform-2.0-Part-I.mdx @@ -4,6 +4,11 @@ authors: - name: Jitendra Shah category: blog image: /assets/images/blog/2021-10-05-data-platform-2-0-part-1.png +tags: +- use-case +- halodoc +- datalake +- datalake-platform --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-10-14-How-Amazon-Transportation-Service-enabled-near-real-time-event-analytics-at-petabyte-scale-using-AWS-Glue-with-Apache-Hudi.mdx b/website/blog/2021-10-14-How-Amazon-Transportation-Service-enabled-near-real-time-event-analytics-at-petabyte-scale-using-AWS-Glue-with-Apache-Hudi.mdx index 17bafa6f98..4b619d0cb9 100644 --- a/website/blog/2021-10-14-How-Amazon-Transportation-Service-enabled-near-real-time-event-analytics-at-petabyte-scale-using-AWS-Glue-with-Apache-Hudi.mdx +++ b/website/blog/2021-10-14-How-Amazon-Transportation-Service-enabled-near-real-time-event-analytics-at-petabyte-scale-using-AWS-Glue-with-Apache-Hudi.mdx @@ -7,6 +7,11 @@ authors: - name: Kunal Gautam category: blog image: /assets/images/blog/2021-10-14-near-real-time-analytics-at-amazon-transportation-service.png +tags: +- use-case +- near real-time analaytics +- analytics at-scale +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-10-21-Practice-of-Apache-Hudi-in-building-real-time-data-lake-at-station-B.mdx b/website/blog/2021-10-21-Practice-of-Apache-Hudi-in-building-real-time-data-lake-at-station-B.mdx index 82698415f3..1f670f9414 100644 --- a/website/blog/2021-10-21-Practice-of-Apache-Hudi-in-building-real-time-data-lake-at-station-B.mdx +++ b/website/blog/2021-10-21-Practice-of-Apache-Hudi-in-building-real-time-data-lake-at-station-B.mdx @@ -4,6 +4,10 @@ authors: - name: Yu Zhaojing category: blog image: /assets/images/blog/2021-10-21-station-b-real-time-data-lake-using-hudi.png +tags: +- use-case +- real-time-datalake +- developpaper --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-11-16-How-GE-Aviation-built-cloud-native-data-pipelines-at-enterprise-scale-using-the-AWS-platform.mdx b/website/blog/2021-11-16-How-GE-Aviation-built-cloud-native-data-pipelines-at-enterprise-scale-using-the-AWS-platform.mdx index a78597d79a..3df2b570b1 100644 --- a/website/blog/2021-11-16-How-GE-Aviation-built-cloud-native-data-pipelines-at-enterprise-scale-using-the-AWS-platform.mdx +++ b/website/blog/2021-11-16-How-GE-Aviation-built-cloud-native-data-pipelines-at-enterprise-scale-using-the-AWS-platform.mdx @@ -5,6 +5,10 @@ authors: - name: Suresh Patnam category: blog image: /assets/images/blog/2021-11-16-ge-aviation-cloud-native-data-pipelines.png +tags: +- use-case +- analytics at-scale +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-11-22-Apache-Hudi-Architecture-Tools-and-Best-Practices.mdx b/website/blog/2021-11-22-Apache-Hudi-Architecture-Tools-and-Best-Practices.mdx index 607d958679..4664a954d6 100644 --- a/website/blog/2021-11-22-Apache-Hudi-Architecture-Tools-and-Best-Practices.mdx +++ b/website/blog/2021-11-22-Apache-Hudi-Architecture-Tools-and-Best-Practices.mdx @@ -4,6 +4,9 @@ authors: - name: Chandan Gaur category: blog image: /assets/images/blog/2021-11-22-hudi-architecture-tools-best-practices.png +tags: +- blog +- xenonstack --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-12-16-lakehouse-concurrency-control-are-we-too-optimistic.md b/website/blog/2021-12-16-lakehouse-concurrency-control-are-we-too-optimistic.md index d5aa207d6a..516d07c31d 100644 --- a/website/blog/2021-12-16-lakehouse-concurrency-control-are-we-too-optimistic.md +++ b/website/blog/2021-12-16-lakehouse-concurrency-control-are-we-too-optimistic.md @@ -4,6 +4,10 @@ excerpt: "Vinoth Chandar, Creator of Apache Hudi, dives into concurrency control author: vinoth category: blog image: /assets/images/blog/concurrency/MultiWriter.gif +tags: +- blog +- concurrency-control +- apache hudi --- Transactions on data lakes are now considered a key characteristic of a Lakehouse these days. But what has actually been accomplished so far? What are the current approaches? How do they fare in real-world scenarios? These questions are the focus of this blog. diff --git a/website/blog/2021-12-20-New-features-from-Apache-Hudi-0.7.0-and-0.8.0-available-on-Amazon-EMR.mdx b/website/blog/2021-12-20-New-features-from-Apache-Hudi-0.7.0-and-0.8.0-available-on-Amazon-EMR.mdx index 88f7117232..209a180aab 100644 --- a/website/blog/2021-12-20-New-features-from-Apache-Hudi-0.7.0-and-0.8.0-available-on-Amazon-EMR.mdx +++ b/website/blog/2021-12-20-New-features-from-Apache-Hudi-0.7.0-and-0.8.0-available-on-Amazon-EMR.mdx @@ -5,6 +5,9 @@ authors: - name: Gagan Brahmi category: blog image: /assets/images/blog/aws.jpg +tags: +- blog +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2021-12-29-hudi-zorder-and-hilbert-space-filling-curves.md b/website/blog/2021-12-29-hudi-zorder-and-hilbert-space-filling-curves.md index cda7b5c66e..5a56a49b1b 100644 --- a/website/blog/2021-12-29-hudi-zorder-and-hilbert-space-filling-curves.md +++ b/website/blog/2021-12-29-hudi-zorder-and-hilbert-space-filling-curves.md @@ -4,6 +4,11 @@ excerpt: "Explore the benefits of new Apache Hudi Z-Order and Hilbert Curves" author: Alexey Kudinkin and Tao Meng category: blog image: /assets/images/zordercurve.png +tags: +- design +- clustering +- data skipping +- apache hudi --- As of Hudi v0.10.0, we are excited to introduce support for an advanced Data Layout Optimization technique known in the database realm as [Z-order](https://en.wikipedia.org/wiki/Z-order_curve) and [Hilbert](https://en.wikipedia.org/wiki/Hilbert_curve) space filling curves. diff --git a/website/blog/2021-12-31-The-Art-of-Building-Open-Data-Lakes-with-Apache-Hudi-Kafka-Hive-and-Debezium.mdx b/website/blog/2021-12-31-The-Art-of-Building-Open-Data-Lakes-with-Apache-Hudi-Kafka-Hive-and-Debezium.mdx index a893596053..59f6febb9c 100644 --- a/website/blog/2021-12-31-The-Art-of-Building-Open-Data-Lakes-with-Apache-Hudi-Kafka-Hive-and-Debezium.mdx +++ b/website/blog/2021-12-31-The-Art-of-Building-Open-Data-Lakes-with-Apache-Hudi-Kafka-Hive-and-Debezium.mdx @@ -4,6 +4,10 @@ authors: - name: Gary Stafford category: blog image: /assets/images/blog/2021-12-31-open-source-data-lakes-on-aws.png +tags: +- how-to +- datalake +- medium --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-01-06-apache-hudi-2021-a-year-in-review.md b/website/blog/2022-01-06-apache-hudi-2021-a-year-in-review.md index 379fb8fbb8..abeb41738f 100644 --- a/website/blog/2022-01-06-apache-hudi-2021-a-year-in-review.md +++ b/website/blog/2022-01-06-apache-hudi-2021-a-year-in-review.md @@ -4,6 +4,10 @@ excerpt: "A reflection on the growth and momentum of Apache Hudi in 2021" author: vinoth category: blog image: /assets/images/Hudi_community.png +tags: +- blog +- community +- apache hudi --- As the year came to end, I took some time to reflect on where we are and what we accomplished in 2021. I am humbled by how strong our community is and how regardless of it being another tough pandemic year, that people from around the globe leaned in together and made this the best year yet for Apache Hudi. In this blog I want to recap some of the 2021 highlights. diff --git a/website/blog/2022-01-14-change-data-capture-with-debezium-and-apache-hudi.md b/website/blog/2022-01-14-change-data-capture-with-debezium-and-apache-hudi.md index f234ba4813..129f2cd62e 100644 --- a/website/blog/2022-01-14-change-data-capture-with-debezium-and-apache-hudi.md +++ b/website/blog/2022-01-14-change-data-capture-with-debezium-and-apache-hudi.md @@ -4,7 +4,12 @@ excerpt: "A review of new Debezium source connector for Apache Hudi" author: Rajesh Mahindra category: blog image: /assets/images/blog/debezium.png - +tags: +- design +- deltastreamer +- cdc +- change-data-capture +- apache hudi --- As of Hudi v0.10.0, we are excited to announce the availability of [Debezium](https://debezium.io/) sources for [Deltastreamer](https://hudi.apache.org/docs/hoodie_deltastreamer) that provide the ingestion of change capture data (CDC) from Postgres and Mysql databases to your data lake. For more details, please refer to the original [RFC](https://github.com/apache/hudi/blob/master/rfc/rfc-39/rfc-39.md). diff --git a/website/blog/2022-01-18-Why-and-How-I-Integrated-Airbyte-and-Apache-Hudi.mdx b/website/blog/2022-01-18-Why-and-How-I-Integrated-Airbyte-and-Apache-Hudi.mdx index 8fac38f5d5..3cc36398c8 100644 --- a/website/blog/2022-01-18-Why-and-How-I-Integrated-Airbyte-and-Apache-Hudi.mdx +++ b/website/blog/2022-01-18-Why-and-How-I-Integrated-Airbyte-and-Apache-Hudi.mdx @@ -4,6 +4,10 @@ authors: - name: Harsha Teja Kanna category: blog image: /assets/images/blog/2022-01-18-airbyte-hudi-integration.png +tags: +- how-to +- deltastreamer +- selectfrom --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-01-20-Hudi-powering-data-lake-efforts-at-Walmart-and-Disney-Hotstar.mdx b/website/blog/2022-01-20-Hudi-powering-data-lake-efforts-at-Walmart-and-Disney-Hotstar.mdx index afcbdaf181..76509df99a 100644 --- a/website/blog/2022-01-20-Hudi-powering-data-lake-efforts-at-Walmart-and-Disney-Hotstar.mdx +++ b/website/blog/2022-01-20-Hudi-powering-data-lake-efforts-at-Walmart-and-Disney-Hotstar.mdx @@ -4,6 +4,9 @@ authors: - name: Sean Michael Kerner category: blog image: /assets/images/blog/2022-01-20-hudi-powering-datalake-efforts.png +tags: +- use-case +- techtarget --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-01-25-Cost-Efficiency-Scale-in-Big-Data-File-Format.mdx b/website/blog/2022-01-25-Cost-Efficiency-Scale-in-Big-Data-File-Format.mdx index 7d6fbdfec2..25916b5fb3 100644 --- a/website/blog/2022-01-25-Cost-Efficiency-Scale-in-Big-Data-File-Format.mdx +++ b/website/blog/2022-01-25-Cost-Efficiency-Scale-in-Big-Data-File-Format.mdx @@ -7,6 +7,12 @@ authors: - name: Mohammad Islam category: blog image: /assets/images/blog/2022-01-25-cost-efficiency-at-scale-in-big-data-file-format.png +tags: +- blog +- cost-efficiency +- compression +- analytics at-scale +- uber --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-02-Onehouse-Commitment-to-Openness.mdx b/website/blog/2022-02-02-Onehouse-Commitment-to-Openness.mdx index 818c6d3a32..feb4cd9d2f 100644 --- a/website/blog/2022-02-02-Onehouse-Commitment-to-Openness.mdx +++ b/website/blog/2022-02-02-Onehouse-Commitment-to-Openness.mdx @@ -4,6 +4,10 @@ authors: - name: Vinoth Chandar category: blog image: /assets/images/blog/2022-02-02-onehouse-commitment-to-openness.jpeg +tags: +- blog +- community +- onehouse --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-03-Onehouse-brings-a-fully-managed-lakehouse-to-Apache-Hudi.mdx b/website/blog/2022-02-03-Onehouse-brings-a-fully-managed-lakehouse-to-Apache-Hudi.mdx index 58482a2bdb..41618edbd6 100644 --- a/website/blog/2022-02-03-Onehouse-brings-a-fully-managed-lakehouse-to-Apache-Hudi.mdx +++ b/website/blog/2022-02-03-Onehouse-brings-a-fully-managed-lakehouse-to-Apache-Hudi.mdx @@ -4,6 +4,10 @@ authors: - name: Paul Sawers category: blog image: /assets/images/blog/2022-02-03-onehouse_billboard.png +tags: +- blog +- lakehouse +- venturebeat --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-09-ACID-transformations-on-Distributed-file-system.mdx b/website/blog/2022-02-09-ACID-transformations-on-Distributed-file-system.mdx index b978935916..c46b8f09fc 100644 --- a/website/blog/2022-02-09-ACID-transformations-on-Distributed-file-system.mdx +++ b/website/blog/2022-02-09-ACID-transformations-on-Distributed-file-system.mdx @@ -4,6 +4,9 @@ authors: - name: Rajasekhar category: blog image: /assets/images/blog/2022-02-09-acid-transformations-on-distributed-files-systems.png +tags: +- blog +- walmartglobaltech --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-12-Open-Source-Data-Lake-Table-Formats-Evaluating-Current-Interest-and-Rate-of-Adoption.mdx b/website/blog/2022-02-12-Open-Source-Data-Lake-Table-Formats-Evaluating-Current-Interest-and-Rate-of-Adoption.mdx index 7a5ec60b1f..daf96618c0 100644 --- a/website/blog/2022-02-12-Open-Source-Data-Lake-Table-Formats-Evaluating-Current-Interest-and-Rate-of-Adoption.mdx +++ b/website/blog/2022-02-12-Open-Source-Data-Lake-Table-Formats-Evaluating-Current-Interest-and-Rate-of-Adoption.mdx @@ -4,6 +4,12 @@ authors: - name: Gary Stafford category: blog image: /assets/images/blog/2022-02-12-open-source-data-lake-formats.png +tags: +- blog +- datalake +- comparison +- community +- medium --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-17-Fresher-Data-Lake-on-AWS-S3.mdx b/website/blog/2022-02-17-Fresher-Data-Lake-on-AWS-S3.mdx index 818bf297da..537d4c1c40 100644 --- a/website/blog/2022-02-17-Fresher-Data-Lake-on-AWS-S3.mdx +++ b/website/blog/2022-02-17-Fresher-Data-Lake-on-AWS-S3.mdx @@ -4,6 +4,10 @@ authors: - name: Balaji Varadarajan category: blog image: /assets/images/blog/2022-02-17-fresher-data-lake-on-aws-s3.png +tags: +- use-case +- incremental-processing +- robinhood --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-02-20-Understanding-its-core-concepts-from-hudi-persistence-files.mdx b/website/blog/2022-02-20-Understanding-its-core-concepts-from-hudi-persistence-files.mdx index 3d08b66d1c..dc6ba194de 100644 --- a/website/blog/2022-02-20-Understanding-its-core-concepts-from-hudi-persistence-files.mdx +++ b/website/blog/2022-02-20-Understanding-its-core-concepts-from-hudi-persistence-files.mdx @@ -4,6 +4,10 @@ authors: - name: QbertsBrother category: blog image: /assets/images/blog/2022-02-20-understanding-core-concepts-from-hudi-persistence-files.png +tags: +- blog +- storage-spec +- programmer --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-03-01-Create-a-low-latency-source-to-data-lake-pipeline-using-Amazon-MSK-Connect-Apache-Flink-and-Apache-Hudi.mdx b/website/blog/2022-03-01-Create-a-low-latency-source-to-data-lake-pipeline-using-Amazon-MSK-Connect-Apache-Flink-and-Apache-Hudi.mdx index cad50dfd3b..ff9a56060f 100644 --- a/website/blog/2022-03-01-Create-a-low-latency-source-to-data-lake-pipeline-using-Amazon-MSK-Connect-Apache-Flink-and-Apache-Hudi.mdx +++ b/website/blog/2022-03-01-Create-a-low-latency-source-to-data-lake-pipeline-using-Amazon-MSK-Connect-Apache-Flink-and-Apache-Hudi.mdx @@ -4,6 +4,12 @@ authors: - name: Ali Alemi category: blog image: /assets/images/blog/2022-03-01-low-latency-pipeline-using-msk-flink-hudi.png +tags: +- how-to +- streaming ingestion +- apache flink +- apache kafka +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-03-09-Build-a-serverless-pipeline-to-analyze-streaming-data-using-AWS-Glue-Apache-Hudi-and-Amazon-S3.mdx b/website/blog/2022-03-09-Build-a-serverless-pipeline-to-analyze-streaming-data-using-AWS-Glue-Apache-Hudi-and-Amazon-S3.mdx index 87163ccc2f..2a1a5cef58 100644 --- a/website/blog/2022-03-09-Build-a-serverless-pipeline-to-analyze-streaming-data-using-AWS-Glue-Apache-Hudi-and-Amazon-S3.mdx +++ b/website/blog/2022-03-09-Build-a-serverless-pipeline-to-analyze-streaming-data-using-AWS-Glue-Apache-Hudi-and-Amazon-S3.mdx @@ -5,7 +5,10 @@ authors: - name: Dipta Bhattacharya category: blog image: /assets/images/blog/2022-03-09-serverless-pipeline-using-glue-hudi-s3.png - +tags: +- how-to +- streaming ingestion +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-03-24-Zendesk-Insights-for-CTOs-Part-3-Growing-your-business-with-modern-data-capabilities.mdx b/website/blog/2022-03-24-Zendesk-Insights-for-CTOs-Part-3-Growing-your-business-with-modern-data-capabilities.mdx index fd003f1733..0ab0e21a5c 100644 --- a/website/blog/2022-03-24-Zendesk-Insights-for-CTOs-Part-3-Growing-your-business-with-modern-data-capabilities.mdx +++ b/website/blog/2022-03-24-Zendesk-Insights-for-CTOs-Part-3-Growing-your-business-with-modern-data-capabilities.mdx @@ -5,6 +5,13 @@ authors: - name: Johnathan Hwang category: blog image: /assets/images/blog/2022-03-24-insights-for-ctos-part-3.png +tags: +- blog +- modern data-architecture +- near real-time analytics +- gdpr deletion +- streaming ingestion +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-04-04-Key-Learnings-on-Using-Apache-HUDI-in-building-Lakehouse-Architecture-at-Halodoc.mdx b/website/blog/2022-04-04-Key-Learnings-on-Using-Apache-HUDI-in-building-Lakehouse-Architecture-at-Halodoc.mdx index 35a2d78592..bf28ba91a7 100644 --- a/website/blog/2022-04-04-Key-Learnings-on-Using-Apache-HUDI-in-building-Lakehouse-Architecture-at-Halodoc.mdx +++ b/website/blog/2022-04-04-Key-Learnings-on-Using-Apache-HUDI-in-building-Lakehouse-Architecture-at-Halodoc.mdx @@ -4,8 +4,11 @@ authors: - name: Jitendra Shah category: blog image: /assets/images/blog/2022-04-04-halodoc-lakehouse-architecture.png - - +tags: +- use-case +- lakehouse +- incremental-processing +- halodoc --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-04-04-New-features-from-Apache-Hudi-0.9.0-on-Amazon-EMR.mdx b/website/blog/2022-04-04-New-features-from-Apache-Hudi-0.9.0-on-Amazon-EMR.mdx index fa6a7e82e2..8ba7a7b6b9 100644 --- a/website/blog/2022-04-04-New-features-from-Apache-Hudi-0.9.0-on-Amazon-EMR.mdx +++ b/website/blog/2022-04-04-New-features-from-Apache-Hudi-0.9.0-on-Amazon-EMR.mdx @@ -6,6 +6,9 @@ authors: - name: Udit Mehrotra category: blog image: /assets/images/blog/aws.jpg +tags: +- blog +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-04-19-Corrections-in-data-lakehouse-table-format-comparisons.mdx b/website/blog/2022-04-19-Corrections-in-data-lakehouse-table-format-comparisons.mdx index e025750dac..eaab6b0309 100644 --- a/website/blog/2022-04-19-Corrections-in-data-lakehouse-table-format-comparisons.mdx +++ b/website/blog/2022-04-19-Corrections-in-data-lakehouse-table-format-comparisons.mdx @@ -4,7 +4,10 @@ authors: - name: Vinoth Chandar category: blog image: /assets/images/blog/2022-04-19-corrections-in-data-lakehouse-table-format-comparisons.png - +tags: +- blog +- lakehouse +- bytearray --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-05-17-Introducing-Multi-Modal-Index-for-the-Lakehouse-in-Apache-Hudi.mdx b/website/blog/2022-05-17-Introducing-Multi-Modal-Index-for-the-Lakehouse-in-Apache-Hudi.mdx index cbfa3ffcbe..1c0e8afc12 100644 --- a/website/blog/2022-05-17-Introducing-Multi-Modal-Index-for-the-Lakehouse-in-Apache-Hudi.mdx +++ b/website/blog/2022-05-17-Introducing-Multi-Modal-Index-for-the-Lakehouse-in-Apache-Hudi.mdx @@ -5,7 +5,11 @@ authors: - name: Ethan Guo category: blog image: /assets/images/blog/2022-05-17-multimodal-index.gif - +tags: +- design +- multi-modal indexing +- lakehouse +- onehouse --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-05-25-Record-by-record-deletable-data-lake-using-Apache-Hudi.mdx b/website/blog/2022-05-25-Record-by-record-deletable-data-lake-using-Apache-Hudi.mdx index 7ecc9bdf58..693830d9b9 100644 --- a/website/blog/2022-05-25-Record-by-record-deletable-data-lake-using-Apache-Hudi.mdx +++ b/website/blog/2022-05-25-Record-by-record-deletable-data-lake-using-Apache-Hudi.mdx @@ -4,6 +4,10 @@ authors: - name: Shota Ejima category: blog image: /assets/images/blog/2022-05-25-data-lake-at-yahoo-advertising-at-yahoo-japan.png +tags: +- use-case +- gdpr deletion +- yahoo --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-06-04-Asynchronous-Indexing-Using-Hudi.mdx b/website/blog/2022-06-04-Asynchronous-Indexing-Using-Hudi.mdx index 832a41da1c..e9bab582aa 100644 --- a/website/blog/2022-06-04-Asynchronous-Indexing-Using-Hudi.mdx +++ b/website/blog/2022-06-04-Asynchronous-Indexing-Using-Hudi.mdx @@ -4,7 +4,11 @@ authors: - name: Sagar Sumit category: blog image: /assets/images/blog/2022-06-04-async-index.png - +tags: +- design +- multi-modal indexing +- onehouse +- async indexing --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-06-09-Singificant-queries-speedup-from-Hudi-Column-Stats-Index-and-Data-Skipping-features.mdx b/website/blog/2022-06-09-Singificant-queries-speedup-from-Hudi-Column-Stats-Index-and-Data-Skipping-features.mdx index 54c66049cf..c2b354ef31 100644 --- a/website/blog/2022-06-09-Singificant-queries-speedup-from-Hudi-Column-Stats-Index-and-Data-Skipping-features.mdx +++ b/website/blog/2022-06-09-Singificant-queries-speedup-from-Hudi-Column-Stats-Index-and-Data-Skipping-features.mdx @@ -4,7 +4,11 @@ authors: - name: Alexey Kudinkin category: blog image: /assets/images/blog/2022-06-09-col-stats-and-data-skipping.png - +tags: +- design +- indexing +- data skipping +- onehouse --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-06-29-Apache-Hudi-vs-Delta-Lake-transparent-tpc-ds-lakehouse-performance-benchmarks.mdx b/website/blog/2022-06-29-Apache-Hudi-vs-Delta-Lake-transparent-tpc-ds-lakehouse-performance-benchmarks.mdx index beffe75f3b..e40222c913 100644 --- a/website/blog/2022-06-29-Apache-Hudi-vs-Delta-Lake-transparent-tpc-ds-lakehouse-performance-benchmarks.mdx +++ b/website/blog/2022-06-29-Apache-Hudi-vs-Delta-Lake-transparent-tpc-ds-lakehouse-performance-benchmarks.mdx @@ -4,7 +4,11 @@ authors: - name: Alexey Kudinkin category: blog image: /assets/images/blog/2022-06-29-apache_hudi_vs_delta_lake_tpc_ds_benchmarks.png - +tags: +- performance +- datalake +- comparison +- onehouse --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md b/website/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md index 4b7b58022f..bfc80e6843 100644 --- a/website/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md +++ b/website/blog/2022-07-11-build-open-lakehouse-using-apache-hudi-and-dbt.md @@ -4,6 +4,11 @@ excerpt: "How to style blog focused projects on teaching how to build an open La author: Vinoth Govindarajan category: blog image: /assets/images/blog/hudi_dbt_lakehouse.png +tags: +- how-to +- deltastreamer +- incremental-processing +- apache hudi --- The focus of this blog is to show you how to build an open lakehouse leveraging incremental data processing and performing field-level updates. We are excited to announce that you can now use Apache Hudi + dbt for building open data lakehouses. diff --git a/website/blog/2022-08-09-How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform.mdx b/website/blog/2022-08-09-How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform.mdx index 633e95f90c..285c1fed54 100644 --- a/website/blog/2022-08-09-How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform.mdx +++ b/website/blog/2022-08-09-How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform.mdx @@ -5,6 +5,11 @@ authors: - name: Dylan Qu category: blog image: /assets/images/blog/2022-08-09-How-NerdWallet-uses-AWS-and-Apache-Hudi-to-build-a-serverless-real-time-analytics-platform.png +tags: +- use-case +- near real-time analytics +- incremental-processing +- amazon --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-08-12-Use-Flink-Hudi-to-Build-a-Streaming-Data-Lake-Platform.mdx b/website/blog/2022-08-12-Use-Flink-Hudi-to-Build-a-Streaming-Data-Lake-Platform.mdx index 4ced2909c0..f53d3bd0dc 100644 --- a/website/blog/2022-08-12-Use-Flink-Hudi-to-Build-a-Streaming-Data-Lake-Platform.mdx +++ b/website/blog/2022-08-12-Use-Flink-Hudi-to-Build-a-Streaming-Data-Lake-Platform.mdx @@ -5,6 +5,11 @@ authors: - name: Liu Dalong category: blog image: /assets/images/blog/2022-08-12-Use-Flink-Hudi-to-Build-a-Streaming-Data-Lake-Platform.png +tags: +- blog +- apache flink +- alibabacloud +- streaming ingestion --- import Redirect from '@site/src/components/Redirect'; diff --git a/website/blog/2022-08-18-Apache-Hudi-vs-Delta-Lake-vs-Apache-Iceberg-Lakehouse-Feature-Comparison.mdx b/website/blog/2022-08-18-Apache-Hudi-vs-Delta-Lake-vs-Apache-Iceberg-Lakehouse-Feature-Comparison.mdx index f6035d472f..ff5030523a 100644 --- a/website/blog/2022-08-18-Apache-Hudi-vs-Delta-Lake-vs-Apache-Iceberg-Lakehouse-Feature-Comparison.mdx +++ b/website/blog/2022-08-18-Apache-Hudi-vs-Delta-Lake-vs-Apache-Iceberg-Lakehouse-Feature-Comparison.mdx @@ -4,9 +4,12 @@ authors: - name: Kyle Weller category: blog image: /assets/images/blog/2022-08-18-apache_hudi_vs_delta_lake_vs_apache_iceberg_feature_comparison.png - +tags: +- lakehouse +- datalake +- comparison +- onehouse --- - import Redirect from '@site/src/components/Redirect'; <Redirect url="https://www.onehouse.ai/blog/apache-hudi-vs-delta-lake-vs-apache-iceberg-lakehouse-feature-comparison">Redirecting... please wait!! </Redirect> diff --git a/website/blog/2022-08-25-Data-Lake-Lakehouse-Guide-Powered-by-Data-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.mdx b/website/blog/2022-08-25-Data-Lake-Lakehouse-Guide-Powered-by-Data-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.mdx index 70ba660e3f..9c039cbe56 100644 --- a/website/blog/2022-08-25-Data-Lake-Lakehouse-Guide-Powered-by-Data-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.mdx +++ b/website/blog/2022-08-25-Data-Lake-Lakehouse-Guide-Powered-by-Data-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.mdx @@ -4,6 +4,12 @@ authors: - name: Simon Späti category: blog image: /assets/images/blog/2022-08-25-Data-Lake-Lakehouse-Guide-Powered-by-Data-Lake-Table-Formats-Delta-Lake-Iceberg-Hudi.png +tags: +- blog +- datalake +- lakehouse +- comparison +- airbyte --- import Redirect from '@site/src/components/Redirect';