This is an automated email from the ASF dual-hosted git repository.
yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 3e0d0d62a5b Clickhouse support low cardinality (#29533)
3e0d0d62a5b is described below
commit 3e0d0d62a5b1a9063f9e07a62556ba8a7299afbe
Author: Mark Zitnik <[email protected]>
AuthorDate: Mon Nov 27 18:19:29 2023 +0200
Clickhouse support low cardinality (#29533)
* Added LowCardinality type to clickhouse
---
CHANGES.md | 1 +
.../beam/sdk/io/clickhouse/ClickHouseIO.java | 3 +-
.../clickhouse/src/main/javacc/ColumnTypeParser.jj | 62 +++++++++++++---------
.../beam/sdk/io/clickhouse/ClickHouseIOTest.java | 8 ++-
4 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/CHANGES.md b/CHANGES.md
index 40c0e1771e9..9318e85d477 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -65,6 +65,7 @@
* Support for X source added (Java/Python)
([#X](https://github.com/apache/beam/issues/X)).
* TextIO now supports skipping multiple header lines (Java)
([#17990](https://github.com/apache/beam/issues/17990)).
* Python GCSIO is now implemented with GCP GCS Client instead of apitools
([#25676](https://github.com/apache/beam/issues/25676))
+* Adding support for LowCardinality DataType in ClickHouse (Java)
([#29533](https://github.com/apache/beam/pull/29533)).
## New Features / Improvements
diff --git
a/sdks/java/io/clickhouse/src/main/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIO.java
b/sdks/java/io/clickhouse/src/main/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIO.java
index daa8b5e1b92..52d520a0a19 100644
---
a/sdks/java/io/clickhouse/src/main/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIO.java
+++
b/sdks/java/io/clickhouse/src/main/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIO.java
@@ -111,7 +111,8 @@ import org.slf4j.LoggerFactory;
* <tr><td>{@link TableSchema.TypeName#BOOL}</td> <td>{@link
Schema.TypeName#BOOLEAN}</td></tr>
* </table>
*
- * Nullable row columns are supported through Nullable type in ClickHouse.
+ * Nullable row columns are supported through Nullable type in ClickHouse. Low
cardinality hint is
+ * supported through LowCardinality DataType in ClickHouse.
*
* <p>Nested rows should be unnested using {@link Select#flattenedSchema()}.
Type casting should be
* done using {@link org.apache.beam.sdk.schemas.transforms.Cast} before
{@link ClickHouseIO}.
diff --git a/sdks/java/io/clickhouse/src/main/javacc/ColumnTypeParser.jj
b/sdks/java/io/clickhouse/src/main/javacc/ColumnTypeParser.jj
index 830499d3207..abe29aff3f8 100644
--- a/sdks/java/io/clickhouse/src/main/javacc/ColumnTypeParser.jj
+++ b/sdks/java/io/clickhouse/src/main/javacc/ColumnTypeParser.jj
@@ -73,31 +73,32 @@ TOKEN :
TOKEN :
{
- < ARRAY : "ARRAY" >
- | < DATE : "DATE" >
- | < DATETIME : "DATETIME" >
- | < ENUM8 : "ENUM8" >
- | < ENUM16 : "ENUM16" >
- | < FIXEDSTRING : "FIXEDSTRING" >
- | < FLOAT32 : "FLOAT32" >
- | < FLOAT64 : "FLOAT64" >
- | < STRING : "STRING" >
- | < INT8 : "INT8" >
- | < INT16 : "INT16" >
- | < INT32 : "INT32" >
- | < INT64 : "INT64" >
- | < UINT8 : "UINT8" >
- | < UINT16 : "UINT16" >
- | < UINT32 : "UINT32" >
- | < UINT64 : "UINT64" >
- | < NULLABLE : "NULLABLE" >
- | < LPAREN : "(" >
- | < RPAREN : ")" >
- | < CAST : "CAST" >
- | < AS : "AS" >
- | < COMMA : "," >
- | < EQ : "=" >
- | < BOOL : "BOOL" >
+ < ARRAY : "ARRAY" >
+ | < DATE : "DATE" >
+ | < DATETIME : "DATETIME" >
+ | < ENUM8 : "ENUM8" >
+ | < ENUM16 : "ENUM16" >
+ | < FIXEDSTRING : "FIXEDSTRING" >
+ | < FLOAT32 : "FLOAT32" >
+ | < FLOAT64 : "FLOAT64" >
+ | < STRING : "STRING" >
+ | < INT8 : "INT8" >
+ | < INT16 : "INT16" >
+ | < INT32 : "INT32" >
+ | < INT64 : "INT64" >
+ | < UINT8 : "UINT8" >
+ | < UINT16 : "UINT16" >
+ | < UINT32 : "UINT32" >
+ | < UINT64 : "UINT64" >
+ | < NULLABLE : "NULLABLE" >
+ | < LPAREN : "(" >
+ | < RPAREN : ")" >
+ | < CAST : "CAST" >
+ | < AS : "AS" >
+ | < COMMA : "," >
+ | < EQ : "=" >
+ | < BOOL : "BOOL" >
+ | < LOWCARDINALITY : "LOWCARDINALITY" >
}
public ColumnType columnType() :
@@ -111,6 +112,7 @@ public ColumnType columnType() :
| ct = enum_()
| ct = array()
| ct = nullable()
+ | ct = lowcardenality()
)
{
return ct;
@@ -278,3 +280,13 @@ private ColumnType enum_() :
}
)
}
+
+private ColumnType lowcardenality() :
+{
+ ColumnType ct;
+}
+{
+ (
+ (<LOWCARDINALITY> <LPAREN> (ct = primitive()) <RPAREN>) { return ct; }
+ )
+}
\ No newline at end of file
diff --git
a/sdks/java/io/clickhouse/src/test/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIOTest.java
b/sdks/java/io/clickhouse/src/test/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIOTest.java
index 8d4f9ab041c..33fe9467d45 100644
---
a/sdks/java/io/clickhouse/src/test/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIOTest.java
+++
b/sdks/java/io/clickhouse/src/test/java/org/apache/beam/sdk/io/clickhouse/ClickHouseIOTest.java
@@ -160,7 +160,8 @@ public class ClickHouseIOTest extends BaseClickHouseTest {
Schema.Field.of("f15", FieldType.STRING),
Schema.Field.of("f16", FieldType.BYTES),
Schema.Field.of("f17", FieldType.logicalType(FixedBytes.of(3))),
- Schema.Field.of("f18", FieldType.BOOLEAN));
+ Schema.Field.of("f18", FieldType.BOOLEAN),
+ Schema.Field.of("f19", FieldType.STRING));
Row row1 =
Row.withSchema(schema)
.addValue(new DateTime(2030, 10, 1, 0, 0, 0, DateTimeZone.UTC))
@@ -182,6 +183,7 @@ public class ClickHouseIOTest extends BaseClickHouseTest {
.addValue(new byte[] {'a', 's', 'd'})
.addValue(new byte[] {'z', 'x', 'c'})
.addValue(true)
+ .addValue("lowcardenality")
.build();
executeSql(
@@ -204,7 +206,8 @@ public class ClickHouseIOTest extends BaseClickHouseTest {
+ "f15 FixedString(3),"
+ "f16 FixedString(3),"
+ "f17 FixedString(3),"
- + "f18 Bool"
+ + "f18 Bool,"
+ + "f19 LowCardinality(String)"
+ ") ENGINE=Log");
pipeline.apply(Create.of(row1).withRowSchema(schema)).apply(write("test_primitive_types"));
@@ -233,6 +236,7 @@ public class ClickHouseIOTest extends BaseClickHouseTest {
assertArrayEquals(new byte[] {'a', 's', 'd'}, rs.getBytes("f16"));
assertArrayEquals(new byte[] {'z', 'x', 'c'}, rs.getBytes("f17"));
assertEquals("true", rs.getString("f18"));
+ assertEquals("lowcardenality", rs.getString("f19"));
}
}