zhouyuan commented on code in PR #11373:
URL:
https://github.com/apache/incubator-gluten/pull/11373#discussion_r2668820968
##########
.github/workflows/velox_backend_arm.yml:
##########
@@ -120,21 +120,75 @@ jobs:
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV
+ - name: Install Hadoop
+ run: |
+ yum install -y tar gzip curl which hostname
+ HADOOP_VERSION=3.3.6
+ curl -L -o /tmp/hadoop.tar.gz
https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
Review Comment:
better to move these to a new helper script
##########
.github/workflows/velox_backend_arm.yml:
##########
@@ -120,21 +120,75 @@ jobs:
- name: Set environment variables
run: |
echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV
+ - name: Install Hadoop
+ run: |
+ yum install -y tar gzip curl which hostname
+ HADOOP_VERSION=3.3.6
+ curl -L -o /tmp/hadoop.tar.gz
https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
+ tar -xzf /tmp/hadoop.tar.gz -C /opt
+ ln -s /opt/hadoop-${HADOOP_VERSION} /opt/hadoop
+ echo "HADOOP_HOME=/opt/hadoop" >> $GITHUB_ENV
+ echo "PATH=/opt/hadoop/bin:/opt/hadoop/sbin:$PATH" >> $GITHUB_ENV
+ set -euxo pipefail
+
+ export HADOOP_HOME=/opt/hadoop
+ export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
+ mkdir -p /tmp/hdfs/nn /tmp/hdfs/dn
+
+ cat > $HADOOP_HOME/etc/hadoop/core-site.xml <<'EOF'
+ <configuration>
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://localhost:9000</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/tmp/hadoop</value>
+ </property>
+ </configuration>
+ EOF
+
+ cat > $HADOOP_HOME/etc/hadoop/hdfs-site.xml <<'EOF'
+ <configuration>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+ <property>
+ <name>dfs.namenode.name.dir</name>
+ <value>file:/tmp/hdfs/nn</value>
+ </property>
+ <property>
+ <name>dfs.datanode.data.dir</name>
+ <value>file:/tmp/hdfs/dn</value>
+ </property>
+ <property>
+ <name>dfs.permissions.enabled</name>
+ <value>false</value>
+ </property>
+ </configuration>
+ EOF
+ echo "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >>
$HADOOP_HOME/etc/hadoop/hadoop-env.sh
+ hdfs namenode -format -force -nonInteractive
+ start-dfs.sh
- name: Build gluten-it
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }}
-Pbackends-velox -DskipTests
cd $GITHUB_WORKSPACE/tools/gluten-it
$MVN_CMD clean install -P${{ matrix.spark }} -P${{ matrix.java }}
- - name: Run TPC-H / TPC-DS
+ - name: Run TPC-H / TPC-DS / HDFS
run: |
echo "JAVA_HOME: $JAVA_HOME"
cd $GITHUB_WORKSPACE/tools/gluten-it
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
+ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
Review Comment:
This will introduce bigger test matrix size, better to use a single
standalone job
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]