Re: [PR] [CI] Add CMake format check [incubator-gluten]

2024-06-07 Thread via GitHub


liuneng1994 commented on code in PR #5941:
URL: https://github.com/apache/incubator-gluten/pull/5941#discussion_r1630740681


##
cpp-ch/CMakeLists.txt:
##
@@ -27,93 +29,93 @@ message("CH_COMMIT=${CH_COMMIT}")
 
 project(libch LANGUAGES C CXX ASM)
 file(GLOB clickhouse_files "${CH_SOURCE_DIR}/*")
-if ("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse")
-if (NOT clickhouse_files)
-execute_process(COMMAND git clone -b ${CH_BRANCH} --depth 3 
https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR} 
COMMAND_ERROR_IS_FATAL ANY)
-execute_process(COMMAND git reset --hard ${CH_COMMIT} 
WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-execute_process(COMMAND git submodule update --init --force --depth 1 
WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-else()
-execute_process(COMMAND git fetch origin ${CH_BRANCH} --depth 3 
WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-execute_process(COMMAND git checkout ${CH_BRANCH} WORKING_DIRECTORY 
${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-execute_process(COMMAND git reset --hard ${CH_COMMIT} 
WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-execute_process(COMMAND git submodule update --init --recursive 
--force --depth 1 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
-endif()
+if("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse")
+  if(NOT clickhouse_files)
+execute_process(
+  COMMAND
+git clone -b ${CH_BRANCH} --depth 3
+https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR}
+COMMAND_ERROR_IS_FATAL ANY)
+execute_process(
+  COMMAND git reset --hard ${CH_COMMIT}
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+execute_process(
+  COMMAND git submodule update --init --force --depth 1
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+  else()
+execute_process(
+  COMMAND git fetch origin ${CH_BRANCH} --depth 3
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+execute_process(
+  COMMAND git checkout ${CH_BRANCH}
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+execute_process(
+  COMMAND git reset --hard ${CH_COMMIT}
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+execute_process(
+  COMMAND git submodule update --init --recursive --force --depth 1
+  WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY)
+  endif()
 else()
-if (NOT clickhouse_files)
-# Checking out *all* submodules takes > 5 min. Therefore, the smoke 
build ("FastTest") in CI initializes only the set of
-# submodules minimally needed for a build and we cannot assume here 
that all submodules are populated.
-message(ERROR "clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix 
try run:")
-message(STATUS "git clone --recursive --depth 1 
https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}")
-endif()
+  if(NOT clickhouse_files)
+# Checking out *all* submodules takes > 5 min. Therefore, the smoke build
+# ("FastTest") in CI initializes only the set of submodules minimally 
needed
+# for a build and we cannot assume here that all submodules are populated.
+message(ERROR
+"clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:")
+message(
+  STATUS
+"git clone --recursive --depth 1 
https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}"
+)
+  endif()
 endif()
 
-if (EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine")
-execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine)
-endif ()
-execute_process(COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine 
${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL ANY)
+if(EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine")
+  execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine)
+endif()
+execute_process(
+  COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine
+  ${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL 
ANY)
 
-# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex 
'.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;)
+# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex
+# '.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;)
 
 set(CH_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/build")
 option(ENABLE_CPP_TEST "Build CPP Unit test" OFF)
 
-if (ENABLE_CPP_TEST)
-add_custom_command(
-USES_TERMINAL
-COMMAND
-bash -c
-\"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
--DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
--DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
--DENABLE_PROTOBUF=ON
--DENABLE_TESTS=ON
-

Re: [PR] [CI] Add CMake format check [incubator-gluten]

2024-06-07 Thread via GitHub


liuneng1994 commented on PR #5941:
URL: 
https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154215349

   > @liuneng1994, do you have any comment?
   
   There is generally no problem, but subjectively, the effect of cmake-format 
seems to make some places more messy.
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] Diff when overflow happens while executing cast big decimal to int [incubator-gluten]

2024-06-07 Thread via GitHub


taiyang-li commented on issue #6016:
URL: 
https://github.com/apache/incubator-gluten/issues/6016#issuecomment-2154203226

   It was found in d_926_0.sql  


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[I] Diff when overflow happens while executing cast big decimal to int [incubator-gluten]

2024-06-07 Thread via GitHub


taiyang-li opened a new issue, #6016:
URL: https://github.com/apache/incubator-gluten/issues/6016

   ### Backend
   
   CH (ClickHouse)
   
   ### Bug description
   
   In CH
   ```
   select cast(300582 as Decimal(29, 2)) as x, x::Int32   
   Received exception from server (version 24.4.1):
   Code: 407. DB::Exception: Received from localhost:9001. DB::Exception: 
Convert overflow: In scope SELECT CAST(300582, 'Decimal(29, 2)') AS x, 
CAST(x, 'Int32'). Stack trace:
   
   0. Poco::Exception::Exception(String const&, int) @ 0x115f495d
   1. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 
0x09c1dbff
   2. DB::Exception::Exception<>(int, FormatStringHelperImpl<>) @ 
0x04ef4cca
   3. void DB::DecimalUtils::convertToImpl>, void>(DB::Decimal> const&, unsigned int, int&) @ 0x0d928acc
   4. 
_ZN2DB18convertFromDecimalINS_15DataTypeDecimalINS_7DecimalIN4wide7integerILm128EiEENS_14DataTypeNumberIiEEQaa17IsDataTypeDecimalIT_E15is_arithmetic_vINT0_9FieldTypeSC_RKNSA_9FieldTypeEj
 @ 0x0d92b531
   5. bool DB::callOnIndexAndDataType, DB::(anonymous 
namespace)::FunctionConvert, DB::(anonymous 
namespace)::NameToInt32, DB::(anonymous 
namespace)::ToNumberMonotonicity>::executeInternal(std::vector> const&, 
std::shared_ptr const&, unsigned long) 
const::'lambda'(auto const&, DB::(anonymous 
namespace)::BehaviourOnErrorFromString)&, DB::(anonymous 
namespace)::BehaviourOnErrorFromString>(DB::TypeIndex, DB::(anonymous 
namespace)::FunctionConvert, DB::(anonymous 
namespace)::NameToInt32, DB::(anonymous 
namespace)::ToNumberMonotonicity>::executeInternal(std::vector> const&, 
std::shared_ptr const&, unsigned long) 
const::'lambda'(auto const&, DB::(anonymous 
namespace)::BehaviourOnErrorFromString)&, DB::(anonymous namespace)::Behaviou
 rOnErrorFromString&&) @ 0x05034f7f
   6. DB::(anonymous namespace)::FunctionConvert, 
DB::(anonymous namespace)::NameToInt32, DB::(anonymous 
namespace)::ToNumberMonotonicity>::executeImpl(std::vector> const&, 
std::shared_ptr const&, unsigned long) const @ 
0x05030aae
   7. 
DB::FunctionToExecutableFunctionAdaptor::executeImpl(std::vector> const&, 
std::shared_ptr const&, unsigned long) const @ 
0x0520bc73
   8. 
DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32b4c6
   9. 
DB::IExecutableFunction::executeWithoutSparseColumns(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32be2d
   10. DB::IExecutableFunction::execute(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32d08c
   11. DB::IFunctionBase::execute(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x04f0418b
   12. COW::immutable_ptr 
std::__function::__policy_invoker::immutable_ptr 
(std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned 
long)>::__call_impl, 
std::shared_ptr 
const&)::'lambda'(std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned long), 
COW::immutable_ptr 
(std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned 
long)>>(std::__function::__policy_storage const*, 
std::vector>&, std::shared_ptr const&, 
DB::ColumnNullable const*, unsigned long) @ 0x04fc67bb
   13. DB::(anonymous 
namespace)::ExecutableFunctionCast::executeImpl(std::vector> const&, 
std::shared_ptr const&, unsigned long) const @ 
0x04fb0e53
   14. 
DB::IExecutableFunction::executeDryRunImpl(std::vector> const&, 
std::shared_ptr const&, unsigned long) const @ 
0x04f0488f
   15. 
DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32b4ae
   16. 
DB::IExecutableFunction::defaultImplementationForConstantArguments(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32b070
   17. 
DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32b455
   18. 
DB::IExecutableFunction::executeWithoutSparseColumns(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32be99
   19. DB::IExecutableFunction::execute(std::vector> const&, 
std::shared_ptr const&, unsigned long, bool) const @ 
0x0d32d08c
   20. DB::(anonymous 
namespace)::QueryAnalyzer::resolveFunction(std::shared_ptr&,
 DB::(anonymous namespace)::IdentifierResolveScope&) @ 0x0e25f868
   21. DB::(anonymous 
namespace)::QueryAnalyzer::resolveExpressionNode(std::shared_ptr&,
 DB::(anonymous namespace)::IdentifierResolveScope&, bool, bool) @ 
0x0e24c046
   22. DB::(anonymous 
namespace)::QueryAnalyzer::resolveExpressionNodeList(std::shared_ptr&,
 DB::(anonymous namespace)::IdentifierResolveScope&, bool, bool) @ 
0x0e24b494
 

Re: [PR] [CI] Add CMake format check [incubator-gluten]

2024-06-07 Thread via GitHub


PHILO-HE commented on PR #5941:
URL: 
https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154199654

   @liuneng1994, do you have any comment?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6015:
URL: 
https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154181375

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6015:
URL: 
https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154179607

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6015:
URL: 
https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154179881

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]

2024-06-07 Thread via GitHub


PHILO-HE opened a new pull request, #6014:
URL: https://github.com/apache/incubator-gluten/pull/6014

   ## What changes were proposed in this pull request?
   
   Spark Size function's legacySizeOfNull is specified either by other 
functions like `ArraySize` or by configuration. So we need to directly pass 
this value of instantiated `Size` to native function. 
   
   Depends on a fix in velox:
   https://github.com/facebookincubator/velox/pull/10100
   
   ## How was this patch tested?
   
   Added test.
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]

2024-06-07 Thread via GitHub


liuneng1994 opened a new pull request, #6015:
URL: https://github.com/apache/incubator-gluten/pull/6015

   ## What changes were proposed in this pull request?
   
   Disable automatic switching of sort shuffle
   
   ## How was this patch tested?
   
   unit tests
   
   
   (If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6014:
URL: 
https://github.com/apache/incubator-gluten/pull/6014#issuecomment-2154179127

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6014:
URL: 
https://github.com/apache/incubator-gluten/pull/6014#issuecomment-2154178827

   https://github.com/apache/incubator-gluten/issues/5248


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2154172502

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2154153081

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-4836][VL]Add support for WindowGroupLimitExec in gluten [incubator-gluten]

2024-06-07 Thread via GitHub


EpsilonPrime commented on code in PR #5398:
URL: https://github.com/apache/incubator-gluten/pull/5398#discussion_r1630694098


##
gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto:
##
@@ -495,6 +504,7 @@ message Rel {
 GenerateRel generate = 17;
 WriteRel write = 18;
 TopNRel top_n = 19;
+WindowGroupLimitRel windowGroupLimit = 20;

Review Comment:
   Right now the this copy differs in about a half dozen ways from the original 
project.  A protobuf saved using this version will be loaded incorrectly since 
DdlRel has field number 20.  What should happen is that the differences 
introduced here get applied back to the main project.  I've started the process 
of the CSV text format there.  The Substrait project introduced 
ConsistentPartitionWindowRel a while back (as field number 17) which may 
actually do what you want here.
   
   As long as you only talk to other consumers using this version of Substrait 
your code will work.  But you're missing out on the other tools.  For instance, 
the Substrait Validator is great for checking that you've constructed a 
conforming plan.  I run all of the plans generated by my end to end tests 
through it and catches issues all the time.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]

2024-06-07 Thread via GitHub


github-actions[bot] commented on PR #6001:
URL: 
https://github.com/apache/incubator-gluten/pull/6001#issuecomment-2154139427

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]

2024-06-06 Thread via GitHub


lgbo-ustc commented on code in PR #6001:
URL: https://github.com/apache/incubator-gluten/pull/6001#discussion_r1630684215


##
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala:
##
@@ -66,6 +66,11 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
   // TODO: support default ANSI policy
   .set("spark.sql.storeAssignmentPolicy", "legacy")
   .set("spark.sql.warehouse.dir", warehouse)
+  .set(

Review Comment:
   This suite is for test functions, disable unfold constant functions should 
be OK



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-4836][VL]Add support for WindowGroupLimitExec in gluten [incubator-gluten]

2024-06-06 Thread via GitHub


acvictor commented on code in PR #5398:
URL: https://github.com/apache/incubator-gluten/pull/5398#discussion_r1630683249


##
gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto:
##
@@ -495,6 +504,7 @@ message Rel {
 GenerateRel generate = 17;
 WriteRel write = 18;
 TopNRel top_n = 19;
+WindowGroupLimitRel windowGroupLimit = 20;

Review Comment:
   @EpsilonPrime can you explain what you mean and if it's still an issue? Does 
any change to algebra.proto need to be updated in the Substrait project too?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Update to_utc_timestamp and from_utc_timestamp tests [incubator-gluten]

2024-06-06 Thread via GitHub


acvictor commented on PR #5358:
URL: 
https://github.com/apache/incubator-gluten/pull/5358#issuecomment-2154129584

   @PHILO-HE can you please review?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #6005:
URL: 
https://github.com/apache/incubator-gluten/pull/6005#issuecomment-2154116778

   Thanks!


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]

2024-06-06 Thread via GitHub


PHILO-HE commented on PR #6007:
URL: 
https://github.com/apache/incubator-gluten/pull/6007#issuecomment-2154090046

   /Benchmark Velox TPCDS


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]

2024-06-06 Thread via GitHub


zml1206 commented on issue #6011:
URL: 
https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2154018895

   > If it's about the golden check of RAS, you can just put whatever the suite 
generates as new golden file in PR and ping me with a comment. I would take 
care of that part later on.
   
   OK, thank you.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CI] Add CMake format check [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5941:
URL: 
https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154008750

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on issue #6011:
URL: 
https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153972125

   If it's about the golden check of RAS, you can just put whatever the suite 
generates as new golden file in PR and ping me with a comment. I would take 
care of that part later on.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on issue #6011:
URL: 
https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153959366

   This is a known issue. Based on current development status of RAS, one of 
the best solution is to have an individual rule to match on `Sort + Sort Agg` 
to remove that sort.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[I] Enhancement - provide option for gluten to pass s3 credentials in to velox using the configured spark.hadoop.fs.s3a.aws.credentials.provider [incubator-gluten]

2024-06-06 Thread via GitHub


xushichangdesmond opened a new issue, #6012:
URL: https://github.com/apache/incubator-gluten/issues/6012

   ### Description
   
   Velox backend does not use the configured 
spark.hadoop.fs.s3a.aws.credentials.provider as its not using the Java AWS sdk, 
but rather the C++ one. One helpful option would be such that gluten can either 
eagerly resolve the credentials first using configured 
spark.hadoop.fs.s3a.aws.credentials.provider to init the velox backend.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153921466

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Optimize the performance of hash based shuffle by accumulating batches [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #5951:
URL: 
https://github.com/apache/incubator-gluten/pull/5951#issuecomment-2153866781

   The change makes sense to me. I think it's operational to merge this and use 
#6009 as follow-up which adds an individual Spark operator controlling this 
behavior for being reused for other operators in future (say, joins or aggs) by 
some kind of strategies. Let me know if any thoughts @XinShuoWang @FelixYBW 
@marin-ma 
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]

2024-06-06 Thread via GitHub


zml1206 commented on issue #6011:
URL: 
https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153854680

   Is this a known issue? @zhztheplayer 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]

2024-06-06 Thread via GitHub


zml1206 opened a new issue, #6011:
URL: https://github.com/apache/incubator-gluten/issues/6011

   ### Backend
   
   VL (Velox)
   
   ### Bug description
   
   ```
   withSQLConf(
 ("spark.sql.adaptive.enabled", "false"),
 ("spark.sql.test.forceApplySortAggregate", "true"),
 ("spark.gluten.sql.columnar.forceShuffledHashJoin", "true")) {
 createTPCHNotNullTables()
spark.sql("select l_partkey,count(1) from lineitem group by 
l_partkey").explain
   }
   ```
   == Physical Plan ==
   ```
   VeloxColumnarToRowExec
   +- ^(2) HashAggregateTransformer(keys=[l_partkey#77L], functions=[count(1)], 
output=[l_partkey#77L, count(1)#123L])
  +- ^(2) SortExecTransformer [l_partkey#77L ASC NULLS FIRST], false, 0
 +- ^(2) InputIteratorTransformer[l_partkey#77L, count#127L]
+- ^(2) InputAdapter
   +- ^(2) RowToVeloxColumnar
  +- ^(2) Exchange hashpartitioning(l_partkey#77L, 5), 
ENSURE_REQUIREMENTS, [plan_id=466]
 +- ^(2) VeloxColumnarToRowExec
+- ^(1) 
FlushableHashAggregateTransformer(keys=[l_partkey#77L], 
functions=[partial_count(1)], output=[l_partkey#77L, count#127L])
   +- ^(1) SortExecTransformer [l_partkey#77L ASC NULLS 
FIRST], false, 0
  +- ^(1) NativeFileScan parquet [l_partkey#77L] 
Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 
paths)[file:/Users/zml/Desktop/git_hub/incubator-gluten/backends-velox/target...,
 PartitionFilters: [], PushedFilters: [], ReadSchema: struct
   ```
   
   ### Spark version
   
   None
   
   ### Spark configurations
   
   _No response_
   
   ### System information
   
   _No response_
   
   ### Relevant logs
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Optimize the performance of hash based shuffle by accumulating batches [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #5951:
URL: 
https://github.com/apache/incubator-gluten/pull/5951#issuecomment-2153794304

   Oops. I missed this PR before opening 
[this](https://github.com/apache/incubator-gluten/pull/6009) for similar 
purpose...


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


marin-ma commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153790260

   cc: @WangGuangxin @FelixYBW 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Add gluten iceberg jar to bundle package [incubator-gluten]

2024-06-06 Thread via GitHub


yma11 commented on PR #6008:
URL: 
https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153776532

   @leoluan2009 Thanks for contribution. For data lake support like iceberg and 
delta lake, we don't include them in Gluten jar as it's not used by all 
customers and will make the package fatter. Any reasons for your changes here?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Add gluten iceberg jar to bundle package [incubator-gluten]

2024-06-06 Thread via GitHub


zhouyuan commented on PR #6008:
URL: 
https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153773219

   CC @yma11 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153767337

   /Benchmark Velox TPCDS


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153766943

   /Benchmark Velox


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153764783

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]

2024-06-06 Thread via GitHub


baibaichen commented on PR #6010:
URL: 
https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153764148

   We need merge this pr due to rebase failed with 
https://github.com/ClickHouse/ClickHouse/pull/64423


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6010:
URL: 
https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153761222

   https://github.com/apache/incubator-gluten/issues/1632


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6010:
URL: 
https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153761381

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]

2024-06-06 Thread via GitHub


kyligence-git opened a new pull request, #6010:
URL: https://github.com/apache/incubator-gluten/pull/6010

   Auto commit by gluten daily build, please check the build status and merge 
it if it's green.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153754440

   @marin-ma There might be some batch-wise overhead around shuffle split 
processing. We may want to figure it out later to avoid doing such batch 
coalesce operations that introduce extra copies.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153752582

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153750521

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6009:
URL: 
https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153750359

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer opened a new pull request, #6009:
URL: https://github.com/apache/incubator-gluten/pull/6009

   It's observed that Velox hash-based shuffle is slowed down by small input 
batches.
   
   The patch:
   
   1. Adds two options:
  - `spark.gluten.sql.columnar.backend.velox.coalesceBatchesBeforeShuffle`
 (Default: false) Set to true to combine small batches with minimal 
batch size determined by `spark.gluten.sql.columnar.maxBatchSize`. (Note the 
misnaming of `maxBatchSize` in Gluten, it might tend to be `minBatchSize`)
  - `spark.gluten.sql.columnar.backend.velox.minBatchSizeForShuffle`
 (Optional) Set to override the minimal batch used by 
`coalesceBatchesBeforeShuffle`.
   2. Does essential code refactors and cleanups. 
   
   ### Comparisons
   (by setting 
spark.gluten.sql.columnar.backend.velox.coalesceBatchesBeforeShuffle=true):
   
   Q31 total time, before and after:
   
![image](https://github.com/apache/incubator-gluten/assets/11284395/718583ad-15a2-473b-86b5-94189c2c5c9b)
   
   Closer look at exchange, before and after:
   
   
![image](https://github.com/apache/incubator-gluten/assets/11284395/3db6bad1-0a04-4879-a923-4ec2b0ee4fce)
   
   
![image](https://github.com/apache/incubator-gluten/assets/11284395/93d55924-d8ef-43f5-b994-c8333fa3d8ab)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] Add gluten iceberg jar to bundle package [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6008:
URL: 
https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153720813

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]

2024-06-06 Thread via GitHub


zzcclp commented on code in PR #6001:
URL: https://github.com/apache/incubator-gluten/pull/6001#discussion_r1630524718


##
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala:
##
@@ -66,6 +66,11 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
   // TODO: support default ANSI policy
   .set("spark.sql.storeAssignmentPolicy", "legacy")
   .set("spark.sql.warehouse.dir", warehouse)
+  .set(

Review Comment:
   set this config in the following ut case ? otherwise it can impact the other 
cases.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [VL] Support file cache spill in Gluten [incubator-gluten]

2024-06-06 Thread via GitHub


yma11 commented on issue #5884:
URL: 
https://github.com/apache/incubator-gluten/issues/5884#issuecomment-2153715084

   @zhli1142015 @FelixYBW @zhouyuan @zhztheplayer The code changes are 
available in following PRs: 
[Spark](https://github.com/yma11/spark/pull/4/files), 
[Gluten](https://github.com/yma11/gluten/pull/2), 
[Velox](https://github.com/yma11/velox/pull/1), please take a review. Next step 
I will test it in E2E and add some docs for it. Here are some explanations 
about code change:
   1) New files in `shims/common`: Existing memory allocator listeners such as 
`ManagedAllocationListener` are under package `gluten-data` and native JNIs are 
under `backends-velox`, but because of I need to call these classes/APIs in the 
injects, so I put them in `shims/common`.
   2) Late initialization of file cache: We use `GlutenMemStoreInjects` to get 
the conf of cache and then do initialization after Velox backend initialized 
which assures the native libs are loaded.
   3) Cache size setting: we need to pass a cache size when 
`setAsyncDataCache`, using the default  `int64_t max` will cause a 
`std::bad_alloc`. But the size is sensitive since in Velox, data cache will use 
this value to control the memory allocation. If it is too small, allocation 
failure will happen at native side even Spark doesn't report it at java side. 
As We leverage Spark memory manager to control the memory logic, we'd resolve 
this confliction by giving a large fake size for AsyncDataCache, maybe same as 
offheap size.
   4) SSD cache can't work well in my test as the file cache entry is easily 
larger than `8M` and will cause check failure. 
[Issue](https://github.com/facebookincubator/velox/issues/10098) is reported 
for tracking.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot opened a new pull request, #6007:
URL: https://github.com/apache/incubator-gluten/pull/6007

   Upstream Velox's New Commits:
   
   ```txt
   35fbc2ea1 by Masha Basmanova, Add from_iso8601_timestamp Presto function 
(10062)
   7164f92ae by Jimmy Lu, Optimize VectorHasher::makeValueIdsDecoded to not 
cache hash values when it is not beneficial (10084)
   79943a76e by Pedro Eugenio Rocha Pedreira, Fix 
MergeJoinTest.dictionaryOutput flakyness (10087)
   1235441b4 by Wei He, Add benchmark of casting string to double (10068)
   c1fefbe47 by Jimmy Lu, Fix lazy evaluation causing incorrect results in 
remaining filter (10072)
   c600d09aa by Kevin Wilfong, Memory corruption in PartitionedOutput when keys 
are not a prefix of input (10075)
   179b1082b by Masha Basmanova, Add UUID Presto type (10078)
   7fe89b4f7 by Andrii Rosa, Fix HashStringAllocator::clear (10053)
   efe648f60 by Masha Basmanova, Allow trailing spaces in cast(varchar as date) 
(10077)
   7637d567f by Ke, Add bucket verification in TableWriter Fuzzer (10039)
   4845e903a by xiaoxmeng, Parallelize the spill processing inside spiller by 
partitioning (9938)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] Update CPP Formatting Script [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot commented on PR #6006:
URL: 
https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2153694756

   = Performance report for TPCH SF2000 with Velox backend, for reference 
only 
   
   
   
   query
   log/native_6006_time.csv
   log/native_master_06_05_2024_54aeb010d_time.csv
   difference
   percentage
   
   
   q1
   33.46
   34.06
   0.593
   101.77%
   
   
   q2
   23.91
   23.10
   -0.813
   96.60%
   
   
   q3
   36.66
   36.59
   -0.070
   99.81%
   
   
   q4
   30.69
   34.70
   4.010
   113.07%
   
   
   q5
   69.29
   69.24
   -0.056
   99.92%
   
   
   q6
   5.86
   7.84
   1.980
   133.81%
   
   
   q7
   80.59
   81.42
   0.832
   101.03%
   
   
   q8
   84.06
   87.21
   3.143
   103.74%
   
   
   q9
   120.65
   119.35
   -1.295
   98.93%
   
   
   q10
   44.71
   45.23
   0.516
   101.15%
   
   
   q11
   20.61
   22.65
   2.045
   109.92%
   
   
   q12
   25.91
   25.04
   -0.875
   96.62%
   
   
   q13
   37.41
   36.70
   -0.715
   98.09%
   
   
   q14
   20.74
   17.35
   -3.393
   83.64%
   
   
   q15
   34.46
   29.97
   -4.490
   86.97%
   
   
   q16
   13.88
   15.12
   1.241
   108.94%
   
   
   q17
   104.36
   104.42
   0.060
   100.06%
   
   
   q18
   145.87
   144.82
   -1.050
   99.28%
   
   
   q19
   14.54
   14.62
   0.082
   100.56%
   
   
 

Re: [PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6007:
URL: 
https://github.com/apache/incubator-gluten/pull/6007#issuecomment-2153695357

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5910] [CH] add custom type to ASTLiteral [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5911:
URL: 
https://github.com/apache/incubator-gluten/pull/5911#issuecomment-2153688739

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Simplify ExecutionCtx + Handle JNI code pattern [incubator-gluten]

2024-06-06 Thread via GitHub


INBreezefall commented on PR #3239:
URL: 
https://github.com/apache/incubator-gluten/pull/3239#issuecomment-2153660309

   > A clean-up to JNI code by pulling up a common pattern `ExecutionCtx` + 
`Handle`.
   Hi, I am a Gluten newbie. I have a simple question, hope you can help me. 
   
   I can see that the ExecutionCtxAware interface is implemented in many JNI 
classes, but its handle method is not called in the Gluten project. Can this 
interface be deleted from JNI, such as ColumnarBatchJniWrapper?
   What is the purpose of this interface? Will the handle method be called?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] Update CPP Formatting Script [incubator-gluten]

2024-06-06 Thread via GitHub


FelixYBW merged PR #6006:
URL: https://github.com/apache/incubator-gluten/pull/6006


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


acvictor commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152985875

   > Can you comment a bit how these info are used by Velox? Is it aligned with 
Spark's behavior?
   
   In Spark, properties are present in the split and obtained during listing. 
Earlier Velox would make one additional call to remote storage to fetch file 
length per openFileForRead call on a path even though this information is 
already present upstream wihle constructing the split. Velox now allows these 
values to be passed from the caller. By making this change we can eliminate one 
additional call to remote storage per path (RTT is in the order of 10s of milli 
seconds).


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Update to_utc_timestamp and from_utc_timestamp tests [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5358:
URL: 
https://github.com/apache/incubator-gluten/pull/5358#issuecomment-2152980133

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152967097

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6004:
URL: 
https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152910863

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152880931

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] Update CPP Formatting Script [incubator-gluten]

2024-06-06 Thread via GitHub


acvictor commented on PR #6006:
URL: 
https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2152770803

   @zhli1142015 @PHILO-HE can you please review?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] Update CPP Formatting Script [incubator-gluten]

2024-06-06 Thread via GitHub


acvictor opened a new pull request, #6006:
URL: https://github.com/apache/incubator-gluten/pull/6006

   ## What changes were proposed in this pull request?
   Update formatcppcode.sh to install clang 15 if not present.
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] Update CPP Formatting Script [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6006:
URL: 
https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2152722473

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152714716

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152656543

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5632:
URL: 
https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152640173

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6004:
URL: 
https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152480747

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Daily Update Velox Version (2024_06_05) [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot commented on PR #5998:
URL: 
https://github.com/apache/incubator-gluten/pull/5998#issuecomment-2152440618

   = Performance report for TPCH SF2000 with Velox backend, for reference 
only 
   
   
   
   query
   log/native_5998_time.csv
   log/native_master_06_05_2024_54aeb010d_time.csv
   difference
   percentage
   
   
   q1
   32.75
   34.06
   1.306
   103.99%
   
   
   q2
   23.69
   23.10
   -0.595
   97.49%
   
   
   q3
   36.26
   36.59
   0.333
   100.92%
   
   
   q4
   32.54
   34.70
   2.156
   106.62%
   
   
   q5
   68.62
   69.24
   0.619
   100.90%
   
   
   q6
   7.79
   7.84
   0.046
   100.59%
   
   
   q7
   81.10
   81.42
   0.316
   100.39%
   
   
   q8
   85.52
   87.21
   1.692
   101.98%
   
   
   q9
   119.13
   119.35
   0.223
   100.19%
   
   
   q10
   46.19
   45.23
   -0.964
   97.91%
   
   
   q11
   20.33
   22.65
   2.318
   111.40%
   
   
   q12
   29.60
   25.04
   -4.566
   84.58%
   
   
   q13
   37.70
   36.70
   -0.998
   97.35%
   
   
   q14
   22.04
   17.35
   -4.687
   78.73%
   
   
   q15
   29.68
   29.97
   0.289
   100.97%
   
   
   q16
   14.01
   15.12
   1.107
   107.90%
   
   
   q17
   102.29
   104.42
   2.132
   102.08%
   
   
   q18
   145.89
   144.82
   -1.074
   99.26%
   
   
   q19
   13.53
   14.62
   1.087
   108.03%
   
   
 

[PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot opened a new pull request, #6005:
URL: https://github.com/apache/incubator-gluten/pull/6005

   Upstream Velox's New Commits:
   
   ```txt
   7637d567f by Ke, Add bucket verification in TableWriter Fuzzer (10039)
   4845e903a by xiaoxmeng, Parallelize the spill processing inside spiller by 
partitioning (9938)
   82cad2766 by Masha Basmanova, Rename prestosql/SIMDJsonXxx structs to 
JsonXxx (10070)
   72f470b24 by Jialiang Tan, Order top memory users by reserved memory (10067)
   4e8476f53 by Kevin Wilfong, Filters on LazyVectors in Joins can lead to 
incorrect results (10045)
   20e48078b by wypb, Use ByteOutputStream::size() in 
RowContainer::storeComplexType (10055)
   3832dfc88 by Athmaja N, Implement TPCH Query 11 in TpchQueryBuilder (9824)
   93846c5c4 by Kevin Wilfong, MergeJoin may hang if right side throws an 
exception ()
   d1f8cdff2 by Masha Basmanova, Add more items to monthly update for May 2024 
(10066)
   80b04bf30 by Jacob Wujciak-Jens, Use correct label for 8-core-ubuntu larger 
runner (10035)
   d7d104d63 by gaoyangxiaozhu, Fix row index wrong value issue when only 
project with const data column (10012)
   c9d36c28f by duanmeng, Add links to new docs for Memory Arbitration, 
RowNumber and TableWriter Fuzzers (10060)
   77b9f9b42 by Jialiang Tan, Fix flaky test 
MultiFragmentTest.taskTerminateWithProblematicRemainingRemoteSplits (10046)
   6e26aa764 by Ke, Fix data verification for Table Writer Fuzzer (10049)
   bb38f138f by Bikramjeet Vig, Fix min and max aggregates for floating points 
(9931)
   278a893cf by Masha Basmanova, Add monthly update for May 2024 (10052)
   79413f241 by rui-mo, Add arg generator for Presto mod decimal function 
(10037)
   a6bb4d81f by xiaoxmeng, Fix memory pool lock order reversion detected by 
Meta internal tsan test (10051)
   3a7f8a88a by Masha Basmanova, Make 'Duplicate key' a user error in 
split_to_map (10054)
   3aebeb660 by wypb, Refactor AggregateCompanionAdapter to remove duplicate 
code (9920)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6005:
URL: 
https://github.com/apache/incubator-gluten/pull/6005#issuecomment-2152360386

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6004:
URL: 
https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152276703

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Daily Update Velox Version (2024_06_05) [incubator-gluten]

2024-06-06 Thread via GitHub


PHILO-HE merged PR #5998:
URL: https://github.com/apache/incubator-gluten/pull/5998


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5828:
URL: 
https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2152039410

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6004:
URL: 
https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2151949609

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6004:
URL: 
https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2151949208

   
   
   Thanks for opening a pull request!
   
   Could you open an issue for this pull request on Github Issues?
   
   https://github.com/apache/incubator-gluten/issues
   
   Then could you also rename ***commit message*** and ***pull request title*** 
in the following format?
   
   [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message}
   
   See also:
   
 * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/)
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]

2024-06-06 Thread via GitHub


zml1206 opened a new pull request, #6004:
URL: https://github.com/apache/incubator-gluten/pull/6004

   ## What changes were proposed in this pull request?
   
   1. Drop custom strategy and move `JoinSelectionOverrides` logic to 
`ColumnarOverrides`
   2. Clickhouse backend already support AQE, delete relevant code.
   
   ## How was this patch tested?
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [Gluten-229] Support Spark SortShuffleManager for ClickHouse Backend [incubator-gluten]

2024-06-06 Thread via GitHub


zml1206 commented on code in PR #230:
URL: https://github.com/apache/incubator-gluten/pull/230#discussion_r1629075647


##
jvm/src/main/scala/io/glutenproject/extension/StrategyOverrides.scala:
##
@@ -76,8 +69,26 @@ object JoinSelectionOverrides extends Strategy with 
JoinSelectionHelper with SQL
   if (GlutenConfig.getSessionConf.forceShuffledHashJoin) {
 // Force use of ShuffledHashJoin in preference to SortMergeJoin. With 
no respect to
 // conf setting "spark.sql.join.preferSortMergeJoin".
-val leftBuildable = canBuildShuffledHashJoinLeft(joinType)
-val rightBuildable = canBuildShuffledHashJoinRight(joinType)
+val (leftBuildable, rightBuildable) = if 
(GlutenConfig.getConf.isClickHouseBackend) {
+  // Currently, ClickHouse backend can not support AQE, so it needs to 
use join hint
+  // to decide the build side, after supporting AQE, will remove this.

Review Comment:
   OK.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot commented on PR #6002:
URL: 
https://github.com/apache/incubator-gluten/pull/6002#issuecomment-2151751915

   = Performance report for TPCH SF2000 with Velox backend, for reference 
only 
   
   
   
   query
   log/native_6002_time.csv
   log/native_master_06_05_2024_54aeb010d_time.csv
   difference
   percentage
   
   
   q1
   33.59
   34.06
   0.466
   101.39%
   
   
   q2
   25.58
   23.10
   -2.483
   90.29%
   
   
   q3
   36.47
   36.59
   0.116
   100.32%
   
   
   q4
   35.01
   34.70
   -0.315
   99.10%
   
   
   q5
   68.00
   69.24
   1.238
   101.82%
   
   
   q6
   5.90
   7.84
   1.934
   132.77%
   
   
   q7
   82.99
   81.42
   -1.573
   98.10%
   
   
   q8
   86.42
   87.21
   0.790
   100.91%
   
   
   q9
   118.95
   119.35
   0.405
   100.34%
   
   
   q10
   43.19
   45.23
   2.041
   104.73%
   
   
   q11
   20.90
   22.65
   1.756
   108.40%
   
   
   q12
   25.60
   25.04
   -0.564
   97.80%
   
   
   q13
   36.91
   36.70
   -0.213
   99.42%
   
   
   q14
   19.27
   17.35
   -1.924
   90.02%
   
   
   q15
   30.06
   29.97
   -0.088
   99.71%
   
   
   q16
   14.22
   15.12
   0.894
   106.29%
   
   
   q17
   100.49
   104.42
   3.929
   103.91%
   
   
   q18
   146.89
   144.82
   -2.077
   98.59%
   
   
   q19
   16.43
   14.62
   -1.811
   88.98%
   
   
 

Re: [PR] [DNM][GLUTEN-5548][VL] test dynamic setting [incubator-gluten]

2024-06-06 Thread via GitHub


zhli1142015 closed pull request #5506: [DNM][GLUTEN-5548][VL] test dynamic 
setting
URL: https://github.com/apache/incubator-gluten/pull/5506


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [DNM] test min [incubator-gluten]

2024-06-06 Thread via GitHub


zhli1142015 closed pull request #5964: [DNM] test min
URL: https://github.com/apache/incubator-gluten/pull/5964


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5910] [CH] add custom type to ASTLiteral [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5911:
URL: 
https://github.com/apache/incubator-gluten/pull/5911#issuecomment-2151681224

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]

2024-06-06 Thread via GitHub


lgbo-ustc commented on issue #3351:
URL: 
https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151622431

   > How is this solved? @JkSelf @kelvin-qin 。 We also meet this problem
   > 
   > ```shell
   > NATIVE_WRITER_CONF="--conf spark.gluten.sql.native.writer.enabled=true
   > --conf spark.files=$SPARK_HOME/jars/gluten.jar
   > --conf spark.driver.extraClassPath=$SPARK_HOME/gluten.jar
   > --conf spark.executor.extraClassPath=./gluten.jar
   > --conf spark.driver.userClassPathFirst=true
   > --conf spark.executor.userClassPathFirst=true
   > "
   > ```
   
   Found the problem. The path configuration spark.driver.extraClassPath is 
wrong


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]

2024-06-06 Thread via GitHub


lgbo-ustc commented on issue #3351:
URL: 
https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151621681

   > ```shell
   > HiveFileFormat
   > ```
   
   Found the problem. The path configuration `spark.driver.extraClassPath` is 
wrong


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240606) [incubator-gluten]

2024-06-06 Thread via GitHub


baibaichen merged PR #5999:
URL: https://github.com/apache/incubator-gluten/pull/5999


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]

2024-06-06 Thread via GitHub


zhztheplayer merged PR #6002:
URL: https://github.com/apache/incubator-gluten/pull/6002


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5828:
URL: 
https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2151564344

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]

2024-06-06 Thread via GitHub


lgbo-ustc commented on issue #3351:
URL: 
https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151558970

   How is this solved?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #5828:
URL: 
https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2151554568

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [VL] can not write hive table on HDFS [incubator-gluten]

2024-06-06 Thread via GitHub


RaoZhiRou-Z commented on issue #5879:
URL: 
https://github.com/apache/incubator-gluten/issues/5879#issuecomment-2151552175

   @PHILO-HE I have tried the bug-fix,and there is another core error,the core 
stack shows that:
   
   **#0  __GI_raise (sig=sig@entry=6) at 
/root/work/deck/devel/toolchain/glibc-2.33/signal/raise.c:49
   #1  0x7f6143cfa527 in __GI_abort () at abort.c:79
   #2  0x7f61435ec089 in os::abort(bool) () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #3  0x7f61437b5363 in VMError::report_and_die() () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #4  0x7f61435f64e5 in JVM_handle_linux_signal () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #5  0x7f61435e8f48 in signalHandler(int, siginfo*, void*) () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #6  
   #7  0x7f5ffc54e52a in 
facebook::velox::dwio::common::FileSink::writeWithLogging(std::vector, std::allocator > 
>&) ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #8  0x7f5ffc54f102 in 
facebook::velox::dwio::common::FileSink::write(facebook::velox::dwio::common::DataBuffer) ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #9  0x7f5ffc3c9306 in 
facebook::velox::parquet::ArrowDataBufferSink::Close() ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #10 0x7f5ffc3c525a in facebook::velox::parquet::Writer::close() ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #11 0x7f5ffe1ce775 in 
Java_org_apache_gluten_datasource_DatasourceJniWrapper_close ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libgluten.so
   #12 0x7f612d018767 in ?? ()
   #13 0x7f5ff178fca8 in ?? ()
   #14 0x7f612d0184c7 in ?? ()
   #15 0x7f612d018482 in ?? ()
   #16 0x7f5ff178fc68 in ?? ()
   #17 0x7f5fe36470e8 in ?? ()
   #18 0x7f5ff178fcd8 in ?? ()
   #19 0x7f5fe3647298 in ?? ()
   #20 0x in ?? ()**


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [VL] can not write hive table on HDFS [incubator-gluten]

2024-06-06 Thread via GitHub


RaoZhiRou-Z commented on issue #5879:
URL: 
https://github.com/apache/incubator-gluten/issues/5879#issuecomment-2151549794

   @PHILO-HE  I have tried the bug-fix,and there is another core error,the core 
stack shows that:
   
   `#0  __GI_raise (sig=sig@entry=6) at 
/root/work/deck/devel/toolchain/glibc-2.33/signal/raise.c:49
   #1  0x7f6143cfa527 in __GI_abort () at abort.c:79
   #2  0x7f61435ec089 in os::abort(bool) () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #3  0x7f61437b5363 in VMError::report_and_die() () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #4  0x7f61435f64e5 in JVM_handle_linux_signal () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #5  0x7f61435e8f48 in signalHandler(int, siginfo*, void*) () from 
/home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so
   #6  
   #7  0x7f5ffc54e52a in 
facebook::velox::dwio::common::FileSink::writeWithLogging(std::vector, std::allocator > 
>&) ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #8  0x7f5ffc54f102 in 
facebook::velox::dwio::common::FileSink::write(facebook::velox::dwio::common::DataBuffer) ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #9  0x7f5ffc3c9306 in 
facebook::velox::parquet::ArrowDataBufferSink::Close() ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #10 0x7f5ffc3c525a in facebook::velox::parquet::Writer::close() ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so
   #11 0x7f5ffe1ce775 in 
Java_org_apache_gluten_datasource_DatasourceJniWrapper_close ()
  from 
/home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libgluten.so
   #12 0x7f612d018767 in ?? ()
   #13 0x7f5ff178fca8 in ?? ()
   #14 0x7f612d0184c7 in ?? ()
   #15 0x7f612d018482 in ?? ()
   #16 0x7f5ff178fc68 in ?? ()
   #17 0x7f5fe36470e8 in ?? ()
   #18 0x7f5ff178fcd8 in ?? ()
   #19 0x7f5fe3647298 in ?? ()
   #20 0x in ?? ()`


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]

2024-06-06 Thread via GitHub


github-actions[bot] commented on PR #6001:
URL: 
https://github.com/apache/incubator-gluten/pull/6001#issuecomment-2151538528

   Run Gluten Clickhouse CI


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Handle try_subtract, try_multiply, try_divide [incubator-gluten]

2024-06-06 Thread via GitHub


GlutenPerfBot commented on PR #5985:
URL: 
https://github.com/apache/incubator-gluten/pull/5985#issuecomment-2151520949

   = Performance report for TPCH SF2000 with Velox backend, for reference 
only 
   
   
   
   query
   log/native_5985_time.csv
   log/native_master_06_05_2024_54aeb010d_time.csv
   difference
   percentage
   
   
   q1
   35.32
   34.06
   -1.266
   96.42%
   
   
   q2
   23.73
   23.10
   -0.627
   97.36%
   
   
   q3
   37.62
   36.59
   -1.033
   97.25%
   
   
   q4
   31.56
   34.70
   3.138
   109.94%
   
   
   q5
   69.91
   69.24
   -0.676
   99.03%
   
   
   q6
   7.53
   7.84
   0.307
   104.08%
   
   
   q7
   82.50
   81.42
   -1.082
   98.69%
   
   
   q8
   87.04
   87.21
   0.163
   100.19%
   
   
   q9
   124.78
   119.35
   -5.429
   95.65%
   
   
   q10
   45.74
   45.23
   -0.509
   98.89%
   
   
   q11
   20.38
   22.65
   2.267
   111.12%
   
   
   q12
   25.56
   25.04
   -0.520
   97.97%
   
   
   q13
   40.29
   36.70
   -3.595
   91.08%
   
   
   q14
   17.81
   17.35
   -0.464
   97.40%
   
   
   q15
   31.24
   29.97
   -1.270
   95.94%
   
   
   q16
   14.42
   15.12
   0.692
   104.80%
   
   
   q17
   104.54
   104.42
   -0.114
   99.89%
   
   
   q18
   145.45
   144.82
   -0.629
   99.57%
   
   
   q19
   13.48
   14.62
   1.143
   108.48%
   
   
 

Re: [PR] [Gluten-229] Support Spark SortShuffleManager for ClickHouse Backend [incubator-gluten]

2024-06-06 Thread via GitHub


zzcclp commented on code in PR #230:
URL: https://github.com/apache/incubator-gluten/pull/230#discussion_r1628850760


##
jvm/src/main/scala/io/glutenproject/extension/StrategyOverrides.scala:
##
@@ -76,8 +69,26 @@ object JoinSelectionOverrides extends Strategy with 
JoinSelectionHelper with SQL
   if (GlutenConfig.getSessionConf.forceShuffledHashJoin) {
 // Force use of ShuffledHashJoin in preference to SortMergeJoin. With 
no respect to
 // conf setting "spark.sql.join.preferSortMergeJoin".
-val leftBuildable = canBuildShuffledHashJoinLeft(joinType)
-val rightBuildable = canBuildShuffledHashJoinRight(joinType)
+val (leftBuildable, rightBuildable) = if 
(GlutenConfig.getConf.isClickHouseBackend) {
+  // Currently, ClickHouse backend can not support AQE, so it needs to 
use join hint
+  // to decide the build side, after supporting AQE, will remove this.

Review Comment:
   remove these comments please, it's not correct, now it already supports AQE.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [VL] Unsupported spark function list [please leave a comment if you plan to pick some] [incubator-gluten]

2024-06-05 Thread via GitHub


PHILO-HE commented on issue #4039:
URL: 
https://github.com/apache/incubator-gluten/issues/4039#issuecomment-2151468471

   > > ubase64: #4482
   > 
   > I see you've map the from_base64 to unbase64, and respectively I find the 
base64 is almost the same as to_base64, so it's just a missing or is there any 
other consideration?
   
   @Donvi, seems there are a few semantic differences between Spark's 
`unbase64` & Velox's `from_base64`. So the simple mapping has not been accepted 
by the community. See discussion: 
https://github.com/apache/incubator-gluten/pull/5242#discussion_r1548887962. I 
guess similarly `to_base64` cannot be mapped due to some unknown differences.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [CH] Not found column when the mixed join condition involves right keys [incubator-gluten]

2024-06-05 Thread via GitHub


baibaichen closed issue #5996: [CH] Not found column when the mixed join 
condition involves right keys
URL: https://github.com/apache/incubator-gluten/issues/5996


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [GLUTEN-5996][CH] Fixed missing columns in join with mixed conditions [incubator-gluten]

2024-06-05 Thread via GitHub


baibaichen merged PR #5997:
URL: https://github.com/apache/incubator-gluten/pull/5997


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[I] Static build issue when GCS is enabled [incubator-gluten]

2024-06-05 Thread via GitHub


PHILO-HE opened a new issue, #6003:
URL: https://github.com/apache/incubator-gluten/issues/6003

   ### Problem description
   
   Here is error log in building abal (vcpkg installed version is not the 
required one), required by google cloud cpp. Looks raw_logging.h (an old 
version of absl) installed in vcpkg is included in compiling.
   ```
   2024-06-04T03:02:17.6948676Z [6/982] Building CXX object 
_deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o
   2024-06-04T03:02:17.6951327Z FAILED: 
_deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o
 
   2024-06-04T03:02:17.6961341Z /opt/rh/devtoolset-9/root/usr/bin/c++ 
-DFOLLY_HAVE_INT128_T=1 -DVELOX_ENABLE_ABFS -DVELOX_ENABLE_HDFS3 
-DVELOX_ENABLE_PARQUET -DVELOX_ENABLE_S3 
-I/__w/incubator-gluten/incubator-gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/include
 -isystem 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src
 -Wno-missing-field-initializers -march=native -std=c++17 -mno-avx512f -mbmi2 
-D USE_VELOX_COMMON_BASE -D HAS_UNCAUGHT_EXCEPTIONS -Wall -Wextra -Wno-unused   
 -Wno-unused-parameter-Wno-sign-compare
-Wno-ignored-qualifiers-Wno-implicit-fallthrough  
-Wno-class-memaccess  -Wno-comment  -Wno-int-in-bool-context
  -Wno-redundant-move  -Wno-array-bounds  
-Wno-maybe-uninitialized  -Wno-unused-result  
-Wno-format-overflow  -Wno-strict-aliasing -O3 -DNDEBUG -std=gnu++17 
-fPIC -fdiagnostics-color=always -Wall -Wextra -Wcast-qual -Wc
 onversion-null -Wformat-security -Wmissing-declarations -Woverlength-strings 
-Wpointer-arith -Wundef -Wunused-local-typedefs -Wunused-result -Wvarargs -Wvla 
-Wwrite-strings -DNOMINMAX -MD -MT 
_deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o
 -MF 
_deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o.d
 -o 
_deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o
 -c 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc
   2024-06-04T03:02:17.6977549Z 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:
 In function 'void 
absl::lts_20230125::raw_log_internal::{anonymous}::RawLogVA(absl::lts_20230125::LogSeverity,
 const char*, int, const char*, __va_list_tag*)':
   2024-06-04T03:02:17.6981062Z 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:178:5:
 error: 'AsyncSignalSafeWriteError' was not declared in this scope; did you 
mean 'AsyncSignalSafeWriteToStderr'?
   2024-06-04T03:02:17.6982922Z   178 | AsyncSignalSafeWriteError(buffer, 
strlen(buffer));
   2024-06-04T03:02:17.6983923Z   | ^
   2024-06-04T03:02:17.6984493Z   | AsyncSignalSafeWriteToStderr
   2024-06-04T03:02:17.6986524Z 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:
 At global scope:
   2024-06-04T03:02:17.6992358Z 
/__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:206:6:
 warning: no previous declaration for 'void 
absl::lts_20230125::raw_log_internal::AsyncSignalSafeWriteError(const char*, 
size_t)' [-Wmissing-declarations]
   2024-06-04T03:02:17.6995703Z   206 | void AsyncSignalSafeWriteError(const 
char* s, size_t len) {
   ```
   
   ### System information
   
   N/A
   
   ### CMake log
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [I] [VL] Unsupported spark function list [please leave a comment if you plan to pick some] [incubator-gluten]

2024-06-05 Thread via GitHub


Donvi commented on issue #4039:
URL: 
https://github.com/apache/incubator-gluten/issues/4039#issuecomment-2151449323

   > ubase64: #4482
   
   I see you've map the from_base64 to unbase64, and respectively I find the 
base64 is almost the same as to_base64, so it's just a missing or is there any 
other consideration?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Handle try_subtract, try_multiply, try_divide [incubator-gluten]

2024-06-05 Thread via GitHub


zhli1142015 merged PR #5985:
URL: https://github.com/apache/incubator-gluten/pull/5985


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]

2024-06-05 Thread via GitHub


zhztheplayer commented on PR #6002:
URL: 
https://github.com/apache/incubator-gluten/pull/6002#issuecomment-2151428336

   cc @JkSelf


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



  1   2   3   4   5   6   7   8   9   10   >