This is an automated email from the ASF dual-hosted git repository. zeroshade pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new 6695d18eb2 GH-36793: [Go] Allow NewSchemaFromStruct to skip fields if tagged with parquet:"-" (#36794) 6695d18eb2 is described below commit 6695d18eb2d7b8eada68e80f0fb030c12b70aabb Author: Chelsea Jones <129552306+chelseajon...@users.noreply.github.com> AuthorDate: Mon Jul 24 11:29:24 2023 -0700 GH-36793: [Go] Allow NewSchemaFromStruct to skip fields if tagged with parquet:"-" (#36794) ### Rationale for this change Allow skipping Go struct fields when serializing to Parquet by using a tag `parquet:"-"`, similarly to the standard Go JSON implementation. ### What changes are included in this PR? Add `Exclude` to the taggedInfo struct, which is used by `typeToNode` to skip the associated struct field. ### Are these changes tested? Yes, I modified an existing test to add a new excluded field. (I'm also using this change locally to read and write parquet files with existing Go structs.) ### Are there any user-facing changes? Yes, this modifies usage of the `parquet` tag. I couldn't find any relevant documentation that needs to be updated though; if there is any please let me know and I will do so. * Closes: #36793 Authored-by: Chelsea Jones <chelseajo...@rivian.com> Signed-off-by: Matt Topol <zotthewiz...@gmail.com> --- go/parquet/schema/reflection.go | 13 +++++++++++-- go/parquet/schema/reflection_test.go | 3 ++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index f1e204a171..b85c1c28c7 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -64,6 +64,8 @@ type taggedInfo struct { LogicalType LogicalType KeyLogicalType LogicalType ValueLogicalType LogicalType + + Exclude bool } func (t *taggedInfo) CopyForKey() (ret taggedInfo) { @@ -186,6 +188,7 @@ func newTaggedInfo() taggedInfo { LogicalType: NoLogicalType{}, KeyLogicalType: NoLogicalType{}, ValueLogicalType: NoLogicalType{}, + Exclude: false, } } @@ -232,6 +235,10 @@ func infoFromTags(f reflect.StructTag) *taggedInfo { if ptags, ok := f.Lookup("parquet"); ok { info := newTaggedInfo() + if ptags == "-" { + info.Exclude = true + return &info + } for _, tag := range strings.Split(strings.Replace(ptags, "\t", "", -1), ",") { tag = strings.TrimSpace(tag) kv := strings.SplitN(tag, "=", 2) @@ -370,8 +377,10 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info fields := make(FieldList, 0) for i := 0; i < typ.NumField(); i++ { f := typ.Field(i) - - fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, infoFromTags(f.Tag))) + tags := infoFromTags(f.Tag) + if tags == nil || !tags.Exclude { + fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, tags)) + } } // group nodes don't have a physical type if physical != parquet.Types.Undefined { diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go index 7be1475513..4a029d0581 100644 --- a/go/parquet/schema/reflection_test.go +++ b/go/parquet/schema/reflection_test.go @@ -309,7 +309,8 @@ func TestStructFromSchema(t *testing.T) { func TestStructFromSchemaWithNesting(t *testing.T) { type Other struct { - List *[]*float32 + List *[]*float32 + Excluded int32 `parquet:"-"` } type Nested struct {