aokolnychyi commented on code in PR #6371:
URL: https://github.com/apache/iceberg/pull/6371#discussion_r1042627234
##########
spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java:
##########
@@ -255,74 +256,90 @@ public static org.apache.iceberg.Table
toIcebergTable(Table table) {
return sparkTable.table();
}
+ public static Transform[] toTransforms(Schema schema, List<PartitionField>
fields) {
+ SpecTransformToSparkTransform visitor = new
SpecTransformToSparkTransform(schema);
+
+ List<Transform> transforms = Lists.newArrayList();
+
+ for (PartitionField field : fields) {
+ Transform transform = PartitionSpecVisitor.visit(schema, field, visitor);
+ if (transform != null) {
+ transforms.add(transform);
+ }
+ }
+
+ return transforms.toArray(new Transform[0]);
+ }
+
/**
* Converts a PartitionSpec to Spark transforms.
*
* @param spec a PartitionSpec
* @return an array of Transforms
*/
public static Transform[] toTransforms(PartitionSpec spec) {
- Map<Integer, String> quotedNameById =
SparkSchemaUtil.indexQuotedNameById(spec.schema());
- List<Transform> transforms =
- PartitionSpecVisitor.visit(
- spec,
- new PartitionSpecVisitor<Transform>() {
- @Override
- public Transform identity(String sourceName, int sourceId) {
- return Expressions.identity(quotedName(sourceId));
- }
+ SpecTransformToSparkTransform visitor = new
SpecTransformToSparkTransform(spec.schema());
+ List<Transform> transforms = PartitionSpecVisitor.visit(spec, visitor);
+ return
transforms.stream().filter(Objects::nonNull).toArray(Transform[]::new);
+ }
- @Override
- public Transform bucket(String sourceName, int sourceId, int
numBuckets) {
- return Expressions.bucket(numBuckets, quotedName(sourceId));
- }
+ private static class SpecTransformToSparkTransform implements
PartitionSpecVisitor<Transform> {
+ private final Map<Integer, String> quotedNameById;
- @Override
- public Transform truncate(String sourceName, int sourceId, int
width) {
- return Expressions.apply(
- "truncate",
- Expressions.column(quotedName(sourceId)),
- Expressions.literal(width));
- }
+ SpecTransformToSparkTransform(Schema schema) {
+ this.quotedNameById = SparkSchemaUtil.indexQuotedNameById(schema);
+ }
- @Override
- public Transform year(String sourceName, int sourceId) {
- return Expressions.years(quotedName(sourceId));
- }
+ @Override
+ public Transform identity(String sourceName, int sourceId) {
+ return Expressions.identity(quotedName(sourceId));
+ }
- @Override
- public Transform month(String sourceName, int sourceId) {
- return Expressions.months(quotedName(sourceId));
- }
+ @Override
+ public Transform bucket(String sourceName, int sourceId, int numBuckets) {
+ return Expressions.bucket(numBuckets, quotedName(sourceId));
+ }
- @Override
- public Transform day(String sourceName, int sourceId) {
- return Expressions.days(quotedName(sourceId));
- }
+ @Override
+ public Transform truncate(String sourceName, int sourceId, int width) {
+ NamedReference column = Expressions.column(quotedName(sourceId));
+ return Expressions.apply("truncate", Expressions.literal(width), column);
Review Comment:
I think the only place that would change is string output of partitioning in
`SparkTable`. Otherwise, we handle both combinations in the `TruncateTransform`
extractor.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]