This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 68bdeb80095 [SPARK-41215][BUILD][PROTOBUF] Support user configurable protoc executables when building Spark Protobuf 68bdeb80095 is described below commit 68bdeb80095507f62e8980b7f70f25d7de4c6156 Author: jianghaonan <jianghao...@baidu.com> AuthorDate: Thu Nov 24 12:01:17 2022 +0900 [SPARK-41215][BUILD][PROTOBUF] Support user configurable protoc executables when building Spark Protobuf ### What changes were proposed in this pull request? This PR use profile named `-Puser-defined-protoc` to support that users can build and test `protobuf` module by specifying custom `protoc` executables. ### Why are the changes needed? As described in [SPARK-41215](https://issues.apache.org/jira/browse/SPARK-41215), the latest versions of `protoc` have the minimum version requirements for basic libraries such as `glibc` and `glibcxx`. Because of that it is not possible to test-compile the `protobuf` module out of the box on CentOS 6 or CentOS 7. Instead the following error messages is shown: ``` [ERROR] /home/disk1/spark-ut/spark/connector/protobuf/src/test/resources/protobuf/timestamp.proto [0:0]: /tmp/protoc6599263403262688374.exe: /lib64/libc.so.6: version `GLIBC_2.14' not found (required by /tmp/protoc6599263403262688374.exe) [ERROR] /home/disk1/spark-ut/spark/connector/protobuf/src/test/resources/protobuf/timestamp.proto [0:0]: /tmp/protoc6599263403262688374.exe: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.18' not found (required by /tmp/protoc6599263403262688374.exe) [ERROR] /home/disk1/spark-ut/spark/connector/protobuf/src/test/resources/protobuf/timestamp.proto [0:0]: /tmp/protoc6599263403262688374.exe: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.14' not found (required by /tmp/protoc6599263403262688374.exe) [ERROR] /home/disk1/spark-ut/spark/connector/protobuf/src/test/resources/protobuf/timestamp.proto [0:0]: /tmp/protoc6599263403262688374.exe: /usr/lib64/libstdc++.so.6: version `CXXABI_1.3.5' not found (required by /tmp/protoc6599263403262688374.exe) ``` ### Does this PR introduce _any_ user-facing change? No, the way to using official pre-release `protoc` binary files is activated by default. ### How was this patch tested? - Pass GitHub Actions - Manual test on CentOS6u3 and CentOS7u4 ```bash export PROTOBUF_PROTOC_EXEC_PATH=/path-to-protoc-exe ./build/mvn clean install -pl connector/protobuf -Puser-defined-protoc -am -DskipTests ./build/mvn clean test -pl connector/protobuf -Puser-defined-protoc ``` and ```bash export PROTOBUF_PROTOC_EXEC_PATH=/path-to-protoc-exe ./build/sbt clean "protobuf/compile" -Puser-defined-protoc ./build/sbt "protobuf/test" -Puser-defined-protoc ``` Closes #38743 from WolverineJiang/master. Authored-by: jianghaonan <jianghao...@baidu.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- connector/protobuf/README.md | 37 ++++++++++++++++++ connector/protobuf/pom.xml | 89 +++++++++++++++++++++++++++++++++----------- project/SparkBuild.scala | 15 +++++++- 3 files changed, 118 insertions(+), 23 deletions(-) diff --git a/connector/protobuf/README.md b/connector/protobuf/README.md new file mode 100644 index 00000000000..4fc28950495 --- /dev/null +++ b/connector/protobuf/README.md @@ -0,0 +1,37 @@ +# Spark Protobuf - Developer Documentation + +## Getting Started + +### Build + +```bash +./build/mvn clean package +``` + +or + +```bash +./build/sbt clean package +``` + +### Build with user-defined `protoc` + +When the user cannot use the official `protoc` binary files to build the `protobuf` module in the compilation environment, +for example, compiling `protobuf` module on CentOS 6 or CentOS 7 which the default `glibc` version is less than 2.14, we can try to compile and test by +specifying the user-defined `protoc` binary files as follows: + +```bash +export PROTOBUF_PROTOC_EXEC_PATH=/path-to-protoc-exe +./build/mvn -Phive -Puser-defined-protoc clean package +``` + +or + +```bash +export PROTOBUF_PROTOC_EXEC_PATH=/path-to-protoc-exe +export CONNECT_PLUGIN_EXEC_PATH=/path-to-protoc-gen-grpc-java-exe +./build/sbt -Puser-defined-protoc clean package +``` + +The user-defined `protoc` binary files can be produced in the user's compilation environment by source code compilation, +for compilation steps, please refer to [protobuf](https://github.com/protocolbuffers/protobuf). diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 9f884b07a12..ab491ad38a8 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -110,28 +110,73 @@ </relocations> </configuration> </plugin> - <plugin> - <groupId>com.github.os72</groupId> - <artifactId>protoc-jar-maven-plugin</artifactId> - <version>3.11.4</version> - <!-- Generates Java classes for tests. TODO(Raghu): Generate descriptor files too. --> - <executions> - <execution> - <phase>generate-test-sources</phase> - <goals> - <goal>run</goal> - </goals> - <configuration> - <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact> - <protocVersion>${protobuf.version}</protocVersion> - <inputDirectories> - <include>src/test/resources/protobuf</include> - </inputDirectories> - <addSources>test</addSources> - </configuration> - </execution> - </executions> - </plugin> </plugins> </build> + <profiles> + <profile> + <id>default-protoc</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <build> + <plugins> + <plugin> + <groupId>com.github.os72</groupId> + <artifactId>protoc-jar-maven-plugin</artifactId> + <version>3.11.4</version> + <!-- Generates Java classes for tests. TODO(Raghu): Generate descriptor files too. --> + <executions> + <execution> + <phase>generate-test-sources</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact> + <protocVersion>${protobuf.version}</protocVersion> + <inputDirectories> + <include>src/test/resources/protobuf</include> + </inputDirectories> + <addSources>test</addSources> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + <profile> + <id>user-defined-protoc</id> + <properties> + <protobuf.protoc.executable.path>${env.PROTOBUF_PROTOC_EXEC_PATH}</protobuf.protoc.executable.path> + </properties> + <build> + <plugins> + <plugin> + <groupId>com.github.os72</groupId> + <artifactId>protoc-jar-maven-plugin</artifactId> + <version>3.11.4</version> + <!-- Generates Java classes for tests. TODO(Raghu): Generate descriptor files too. --> + <executions> + <execution> + <phase>generate-test-sources</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact> + <protocVersion>${protobuf.version}</protocVersion> + <protocCommand>${protobuf.protoc.executable.path}</protocCommand> + <inputDirectories> + <include>src/test/resources/protobuf</include> + </inputDirectories> + <addSources>test</addSources> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> </project> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 9e5fe62e010..b82c53a0635 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -112,10 +112,14 @@ object SparkBuild extends PomBuild { if (profiles.contains("user-defined-protoc")) { val connectProtocExecPath = Properties.envOrNone("CONNECT_PROTOC_EXEC_PATH") val connectPluginExecPath = Properties.envOrNone("CONNECT_PLUGIN_EXEC_PATH") + val protobufProtocExecPath = Properties.envOrNone("PROTOBUF_PROTOC_EXEC_PATH") if (connectProtocExecPath.isDefined && connectPluginExecPath.isDefined) { sys.props.put("connect.protoc.executable.path", connectProtocExecPath.get) sys.props.put("connect.plugin.executable.path", connectPluginExecPath.get) } + if (protobufProtocExecPath.isDefined) { + sys.props.put("protobuf.protoc.executable.path", protobufProtocExecPath.get) + } } profiles } @@ -779,7 +783,16 @@ object SparkProtobuf { case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard case _ => MergeStrategy.first }, - ) + ) ++ { + val protobufProtocExecPath = sys.props.get("protobuf.protoc.executable.path") + if (protobufProtocExecPath.isDefined) { + Seq( + PB.protocExecutable := file(protobufProtocExecPath.get) + ) + } else { + Seq.empty + } + } } object Unsafe { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org