Re: [PR] [CI] Add CMake format check [incubator-gluten]
liuneng1994 commented on code in PR #5941: URL: https://github.com/apache/incubator-gluten/pull/5941#discussion_r1630740681 ## cpp-ch/CMakeLists.txt: ## @@ -27,93 +29,93 @@ message("CH_COMMIT=${CH_COMMIT}") project(libch LANGUAGES C CXX ASM) file(GLOB clickhouse_files "${CH_SOURCE_DIR}/*") -if ("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") -if (NOT clickhouse_files) -execute_process(COMMAND git clone -b ${CH_BRANCH} --depth 3 https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -execute_process(COMMAND git reset --hard ${CH_COMMIT} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -execute_process(COMMAND git submodule update --init --force --depth 1 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -else() -execute_process(COMMAND git fetch origin ${CH_BRANCH} --depth 3 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -execute_process(COMMAND git checkout ${CH_BRANCH} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -execute_process(COMMAND git reset --hard ${CH_COMMIT} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -execute_process(COMMAND git submodule update --init --recursive --force --depth 1 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) -endif() +if("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") + if(NOT clickhouse_files) +execute_process( + COMMAND +git clone -b ${CH_BRANCH} --depth 3 +https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR} +COMMAND_ERROR_IS_FATAL ANY) +execute_process( + COMMAND git reset --hard ${CH_COMMIT} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) +execute_process( + COMMAND git submodule update --init --force --depth 1 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + else() +execute_process( + COMMAND git fetch origin ${CH_BRANCH} --depth 3 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) +execute_process( + COMMAND git checkout ${CH_BRANCH} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) +execute_process( + COMMAND git reset --hard ${CH_COMMIT} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) +execute_process( + COMMAND git submodule update --init --recursive --force --depth 1 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + endif() else() -if (NOT clickhouse_files) -# Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of -# submodules minimally needed for a build and we cannot assume here that all submodules are populated. -message(ERROR "clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:") -message(STATUS "git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}") -endif() + if(NOT clickhouse_files) +# Checking out *all* submodules takes > 5 min. Therefore, the smoke build +# ("FastTest") in CI initializes only the set of submodules minimally needed +# for a build and we cannot assume here that all submodules are populated. +message(ERROR +"clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:") +message( + STATUS +"git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}" +) + endif() endif() -if (EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine") -execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine) -endif () -execute_process(COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine ${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL ANY) +if(EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine") + execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine) +endif() +execute_process( + COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine + ${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL ANY) -# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex '.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;) +# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex +# '.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;) set(CH_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/build") option(ENABLE_CPP_TEST "Build CPP Unit test" OFF) -if (ENABLE_CPP_TEST) -add_custom_command( -USES_TERMINAL -COMMAND -bash -c -\"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} --DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} --DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} --DENABLE_PROTOBUF=ON --DENABLE_TESTS=ON -
Re: [PR] [CI] Add CMake format check [incubator-gluten]
liuneng1994 commented on PR #5941: URL: https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154215349 > @liuneng1994, do you have any comment? There is generally no problem, but subjectively, the effect of cmake-format seems to make some places more messy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] Diff when overflow happens while executing cast big decimal to int [incubator-gluten]
taiyang-li commented on issue #6016: URL: https://github.com/apache/incubator-gluten/issues/6016#issuecomment-2154203226 It was found in d_926_0.sql -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[I] Diff when overflow happens while executing cast big decimal to int [incubator-gluten]
taiyang-li opened a new issue, #6016: URL: https://github.com/apache/incubator-gluten/issues/6016 ### Backend CH (ClickHouse) ### Bug description In CH ``` select cast(300582 as Decimal(29, 2)) as x, x::Int32 Received exception from server (version 24.4.1): Code: 407. DB::Exception: Received from localhost:9001. DB::Exception: Convert overflow: In scope SELECT CAST(300582, 'Decimal(29, 2)') AS x, CAST(x, 'Int32'). Stack trace: 0. Poco::Exception::Exception(String const&, int) @ 0x115f495d 1. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x09c1dbff 2. DB::Exception::Exception<>(int, FormatStringHelperImpl<>) @ 0x04ef4cca 3. void DB::DecimalUtils::convertToImpl>, void>(DB::Decimal> const&, unsigned int, int&) @ 0x0d928acc 4. _ZN2DB18convertFromDecimalINS_15DataTypeDecimalINS_7DecimalIN4wide7integerILm128EiEENS_14DataTypeNumberIiEEQaa17IsDataTypeDecimalIT_E15is_arithmetic_vINT0_9FieldTypeSC_RKNSA_9FieldTypeEj @ 0x0d92b531 5. bool DB::callOnIndexAndDataType, DB::(anonymous namespace)::FunctionConvert, DB::(anonymous namespace)::NameToInt32, DB::(anonymous namespace)::ToNumberMonotonicity>::executeInternal(std::vector> const&, std::shared_ptr const&, unsigned long) const::'lambda'(auto const&, DB::(anonymous namespace)::BehaviourOnErrorFromString)&, DB::(anonymous namespace)::BehaviourOnErrorFromString>(DB::TypeIndex, DB::(anonymous namespace)::FunctionConvert, DB::(anonymous namespace)::NameToInt32, DB::(anonymous namespace)::ToNumberMonotonicity>::executeInternal(std::vector> const&, std::shared_ptr const&, unsigned long) const::'lambda'(auto const&, DB::(anonymous namespace)::BehaviourOnErrorFromString)&, DB::(anonymous namespace)::Behaviou rOnErrorFromString&&) @ 0x05034f7f 6. DB::(anonymous namespace)::FunctionConvert, DB::(anonymous namespace)::NameToInt32, DB::(anonymous namespace)::ToNumberMonotonicity>::executeImpl(std::vector> const&, std::shared_ptr const&, unsigned long) const @ 0x05030aae 7. DB::FunctionToExecutableFunctionAdaptor::executeImpl(std::vector> const&, std::shared_ptr const&, unsigned long) const @ 0x0520bc73 8. DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32b4c6 9. DB::IExecutableFunction::executeWithoutSparseColumns(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32be2d 10. DB::IExecutableFunction::execute(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32d08c 11. DB::IFunctionBase::execute(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x04f0418b 12. COW::immutable_ptr std::__function::__policy_invoker::immutable_ptr (std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned long)>::__call_impl, std::shared_ptr const&)::'lambda'(std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned long), COW::immutable_ptr (std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned long)>>(std::__function::__policy_storage const*, std::vector>&, std::shared_ptr const&, DB::ColumnNullable const*, unsigned long) @ 0x04fc67bb 13. DB::(anonymous namespace)::ExecutableFunctionCast::executeImpl(std::vector> const&, std::shared_ptr const&, unsigned long) const @ 0x04fb0e53 14. DB::IExecutableFunction::executeDryRunImpl(std::vector> const&, std::shared_ptr const&, unsigned long) const @ 0x04f0488f 15. DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32b4ae 16. DB::IExecutableFunction::defaultImplementationForConstantArguments(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32b070 17. DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32b455 18. DB::IExecutableFunction::executeWithoutSparseColumns(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32be99 19. DB::IExecutableFunction::execute(std::vector> const&, std::shared_ptr const&, unsigned long, bool) const @ 0x0d32d08c 20. DB::(anonymous namespace)::QueryAnalyzer::resolveFunction(std::shared_ptr&, DB::(anonymous namespace)::IdentifierResolveScope&) @ 0x0e25f868 21. DB::(anonymous namespace)::QueryAnalyzer::resolveExpressionNode(std::shared_ptr&, DB::(anonymous namespace)::IdentifierResolveScope&, bool, bool) @ 0x0e24c046 22. DB::(anonymous namespace)::QueryAnalyzer::resolveExpressionNodeList(std::shared_ptr&, DB::(anonymous namespace)::IdentifierResolveScope&, bool, bool) @ 0x0e24b494
Re: [PR] [CI] Add CMake format check [incubator-gluten]
PHILO-HE commented on PR #5941: URL: https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154199654 @liuneng1994, do you have any comment? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]
github-actions[bot] commented on PR #6015: URL: https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154181375 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]
github-actions[bot] commented on PR #6015: URL: https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154179607 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]
github-actions[bot] commented on PR #6015: URL: https://github.com/apache/incubator-gluten/pull/6015#issuecomment-2154179881 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]
PHILO-HE opened a new pull request, #6014: URL: https://github.com/apache/incubator-gluten/pull/6014 ## What changes were proposed in this pull request? Spark Size function's legacySizeOfNull is specified either by other functions like `ArraySize` or by configuration. So we need to directly pass this value of instantiated `Size` to native function. Depends on a fix in velox: https://github.com/facebookincubator/velox/pull/10100 ## How was this patch tested? Added test. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [CH] Disable automatic switching of sort shuffle [incubator-gluten]
liuneng1994 opened a new pull request, #6015: URL: https://github.com/apache/incubator-gluten/pull/6015 ## What changes were proposed in this pull request? Disable automatic switching of sort shuffle ## How was this patch tested? unit tests (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]
github-actions[bot] commented on PR #6014: URL: https://github.com/apache/incubator-gluten/pull/6014#issuecomment-2154179127 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5248][VL] Directly pass legacySizeOfNull to native size function [incubator-gluten]
github-actions[bot] commented on PR #6014: URL: https://github.com/apache/incubator-gluten/pull/6014#issuecomment-2154178827 https://github.com/apache/incubator-gluten/issues/5248 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2154172502 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2154153081 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-4836][VL]Add support for WindowGroupLimitExec in gluten [incubator-gluten]
EpsilonPrime commented on code in PR #5398: URL: https://github.com/apache/incubator-gluten/pull/5398#discussion_r1630694098 ## gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto: ## @@ -495,6 +504,7 @@ message Rel { GenerateRel generate = 17; WriteRel write = 18; TopNRel top_n = 19; +WindowGroupLimitRel windowGroupLimit = 20; Review Comment: Right now the this copy differs in about a half dozen ways from the original project. A protobuf saved using this version will be loaded incorrectly since DdlRel has field number 20. What should happen is that the differences introduced here get applied back to the main project. I've started the process of the CSV text format there. The Substrait project introduced ConsistentPartitionWindowRel a while back (as field number 17) which may actually do what you want here. As long as you only talk to other consumers using this version of Substrait your code will work. But you're missing out on the other tools. For instance, the Substrait Validator is great for checking that you've constructed a conforming plan. I run all of the plans generated by my end to end tests through it and catches issues all the time. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]
github-actions[bot] commented on PR #6001: URL: https://github.com/apache/incubator-gluten/pull/6001#issuecomment-2154139427 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]
lgbo-ustc commented on code in PR #6001: URL: https://github.com/apache/incubator-gluten/pull/6001#discussion_r1630684215 ## backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala: ## @@ -66,6 +66,11 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite { // TODO: support default ANSI policy .set("spark.sql.storeAssignmentPolicy", "legacy") .set("spark.sql.warehouse.dir", warehouse) + .set( Review Comment: This suite is for test functions, disable unfold constant functions should be OK -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-4836][VL]Add support for WindowGroupLimitExec in gluten [incubator-gluten]
acvictor commented on code in PR #5398: URL: https://github.com/apache/incubator-gluten/pull/5398#discussion_r1630683249 ## gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto: ## @@ -495,6 +504,7 @@ message Rel { GenerateRel generate = 17; WriteRel write = 18; TopNRel top_n = 19; +WindowGroupLimitRel windowGroupLimit = 20; Review Comment: @EpsilonPrime can you explain what you mean and if it's still an issue? Does any change to algebra.proto need to be updated in the Substrait project too? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Update to_utc_timestamp and from_utc_timestamp tests [incubator-gluten]
acvictor commented on PR #5358: URL: https://github.com/apache/incubator-gluten/pull/5358#issuecomment-2154129584 @PHILO-HE can you please review? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]
zhztheplayer commented on PR #6005: URL: https://github.com/apache/incubator-gluten/pull/6005#issuecomment-2154116778 Thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]
PHILO-HE commented on PR #6007: URL: https://github.com/apache/incubator-gluten/pull/6007#issuecomment-2154090046 /Benchmark Velox TPCDS -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]
zml1206 commented on issue #6011: URL: https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2154018895 > If it's about the golden check of RAS, you can just put whatever the suite generates as new golden file in PR and ping me with a comment. I would take care of that part later on. OK, thank you. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CI] Add CMake format check [incubator-gluten]
github-actions[bot] commented on PR #5941: URL: https://github.com/apache/incubator-gluten/pull/5941#issuecomment-2154008750 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]
zhztheplayer commented on issue #6011: URL: https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153972125 If it's about the golden check of RAS, you can just put whatever the suite generates as new golden file in PR and ping me with a comment. I would take care of that part later on. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]
zhztheplayer commented on issue #6011: URL: https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153959366 This is a known issue. Based on current development status of RAS, one of the best solution is to have an individual rule to match on `Sort + Sort Agg` to remove that sort. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[I] Enhancement - provide option for gluten to pass s3 credentials in to velox using the configured spark.hadoop.fs.s3a.aws.credentials.provider [incubator-gluten]
xushichangdesmond opened a new issue, #6012: URL: https://github.com/apache/incubator-gluten/issues/6012 ### Description Velox backend does not use the configured spark.hadoop.fs.s3a.aws.credentials.provider as its not using the Java AWS sdk, but rather the C++ one. One helpful option would be such that gluten can either eagerly resolve the credentials first using configured spark.hadoop.fs.s3a.aws.credentials.provider to init the velox backend. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153921466 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Optimize the performance of hash based shuffle by accumulating batches [incubator-gluten]
zhztheplayer commented on PR #5951: URL: https://github.com/apache/incubator-gluten/pull/5951#issuecomment-2153866781 The change makes sense to me. I think it's operational to merge this and use #6009 as follow-up which adds an individual Spark operator controlling this behavior for being reused for other operators in future (say, joins or aggs) by some kind of strategies. Let me know if any thoughts @XinShuoWang @FelixYBW @marin-ma -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]
zml1206 commented on issue #6011: URL: https://github.com/apache/incubator-gluten/issues/6011#issuecomment-2153854680 Is this a known issue? @zhztheplayer -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[I] Can't drop SortExecTransformer in convert SortAggregateExec to HashAggregateExecBaseTransformer when ras enbled [incubator-gluten]
zml1206 opened a new issue, #6011: URL: https://github.com/apache/incubator-gluten/issues/6011 ### Backend VL (Velox) ### Bug description ``` withSQLConf( ("spark.sql.adaptive.enabled", "false"), ("spark.sql.test.forceApplySortAggregate", "true"), ("spark.gluten.sql.columnar.forceShuffledHashJoin", "true")) { createTPCHNotNullTables() spark.sql("select l_partkey,count(1) from lineitem group by l_partkey").explain } ``` == Physical Plan == ``` VeloxColumnarToRowExec +- ^(2) HashAggregateTransformer(keys=[l_partkey#77L], functions=[count(1)], output=[l_partkey#77L, count(1)#123L]) +- ^(2) SortExecTransformer [l_partkey#77L ASC NULLS FIRST], false, 0 +- ^(2) InputIteratorTransformer[l_partkey#77L, count#127L] +- ^(2) InputAdapter +- ^(2) RowToVeloxColumnar +- ^(2) Exchange hashpartitioning(l_partkey#77L, 5), ENSURE_REQUIREMENTS, [plan_id=466] +- ^(2) VeloxColumnarToRowExec +- ^(1) FlushableHashAggregateTransformer(keys=[l_partkey#77L], functions=[partial_count(1)], output=[l_partkey#77L, count#127L]) +- ^(1) SortExecTransformer [l_partkey#77L ASC NULLS FIRST], false, 0 +- ^(1) NativeFileScan parquet [l_partkey#77L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/zml/Desktop/git_hub/incubator-gluten/backends-velox/target..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` ### Spark version None ### Spark configurations _No response_ ### System information _No response_ ### Relevant logs _No response_ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Optimize the performance of hash based shuffle by accumulating batches [incubator-gluten]
zhztheplayer commented on PR #5951: URL: https://github.com/apache/incubator-gluten/pull/5951#issuecomment-2153794304 Oops. I missed this PR before opening [this](https://github.com/apache/incubator-gluten/pull/6009) for similar purpose... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
marin-ma commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153790260 cc: @WangGuangxin @FelixYBW -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Add gluten iceberg jar to bundle package [incubator-gluten]
yma11 commented on PR #6008: URL: https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153776532 @leoluan2009 Thanks for contribution. For data lake support like iceberg and delta lake, we don't include them in Gluten jar as it's not used by all customers and will make the package fatter. Any reasons for your changes here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Add gluten iceberg jar to bundle package [incubator-gluten]
zhouyuan commented on PR #6008: URL: https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153773219 CC @yma11 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
zhztheplayer commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153767337 /Benchmark Velox TPCDS -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
zhztheplayer commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153766943 /Benchmark Velox -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153764783 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]
baibaichen commented on PR #6010: URL: https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153764148 We need merge this pr due to rebase failed with https://github.com/ClickHouse/ClickHouse/pull/64423 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]
github-actions[bot] commented on PR #6010: URL: https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153761222 https://github.com/apache/incubator-gluten/issues/1632 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]
github-actions[bot] commented on PR #6010: URL: https://github.com/apache/incubator-gluten/pull/6010#issuecomment-2153761381 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240607) [incubator-gluten]
kyligence-git opened a new pull request, #6010: URL: https://github.com/apache/incubator-gluten/pull/6010 Auto commit by gluten daily build, please check the build status and merge it if it's green. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
zhztheplayer commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153754440 @marin-ma There might be some batch-wise overhead around shuffle split processing. We may want to figure it out later to avoid doing such batch coalesce operations that introduce extra copies. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153752582 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153750521 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
github-actions[bot] commented on PR #6009: URL: https://github.com/apache/incubator-gluten/pull/6009#issuecomment-2153750359 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [VL] Provide options to combine small batches before sending to shuffle [incubator-gluten]
zhztheplayer opened a new pull request, #6009: URL: https://github.com/apache/incubator-gluten/pull/6009 It's observed that Velox hash-based shuffle is slowed down by small input batches. The patch: 1. Adds two options: - `spark.gluten.sql.columnar.backend.velox.coalesceBatchesBeforeShuffle` (Default: false) Set to true to combine small batches with minimal batch size determined by `spark.gluten.sql.columnar.maxBatchSize`. (Note the misnaming of `maxBatchSize` in Gluten, it might tend to be `minBatchSize`) - `spark.gluten.sql.columnar.backend.velox.minBatchSizeForShuffle` (Optional) Set to override the minimal batch used by `coalesceBatchesBeforeShuffle`. 2. Does essential code refactors and cleanups. ### Comparisons (by setting spark.gluten.sql.columnar.backend.velox.coalesceBatchesBeforeShuffle=true): Q31 total time, before and after: ![image](https://github.com/apache/incubator-gluten/assets/11284395/718583ad-15a2-473b-86b5-94189c2c5c9b) Closer look at exchange, before and after: ![image](https://github.com/apache/incubator-gluten/assets/11284395/3db6bad1-0a04-4879-a923-4ec2b0ee4fce) ![image](https://github.com/apache/incubator-gluten/assets/11284395/93d55924-d8ef-43f5-b994-c8333fa3d8ab) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] Add gluten iceberg jar to bundle package [incubator-gluten]
github-actions[bot] commented on PR #6008: URL: https://github.com/apache/incubator-gluten/pull/6008#issuecomment-2153720813 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]
zzcclp commented on code in PR #6001: URL: https://github.com/apache/incubator-gluten/pull/6001#discussion_r1630524718 ## backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala: ## @@ -66,6 +66,11 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite { // TODO: support default ANSI policy .set("spark.sql.storeAssignmentPolicy", "legacy") .set("spark.sql.warehouse.dir", warehouse) + .set( Review Comment: set this config in the following ut case ? otherwise it can impact the other cases. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [VL] Support file cache spill in Gluten [incubator-gluten]
yma11 commented on issue #5884: URL: https://github.com/apache/incubator-gluten/issues/5884#issuecomment-2153715084 @zhli1142015 @FelixYBW @zhouyuan @zhztheplayer The code changes are available in following PRs: [Spark](https://github.com/yma11/spark/pull/4/files), [Gluten](https://github.com/yma11/gluten/pull/2), [Velox](https://github.com/yma11/velox/pull/1), please take a review. Next step I will test it in E2E and add some docs for it. Here are some explanations about code change: 1) New files in `shims/common`: Existing memory allocator listeners such as `ManagedAllocationListener` are under package `gluten-data` and native JNIs are under `backends-velox`, but because of I need to call these classes/APIs in the injects, so I put them in `shims/common`. 2) Late initialization of file cache: We use `GlutenMemStoreInjects` to get the conf of cache and then do initialization after Velox backend initialized which assures the native libs are loaded. 3) Cache size setting: we need to pass a cache size when `setAsyncDataCache`, using the default `int64_t max` will cause a `std::bad_alloc`. But the size is sensitive since in Velox, data cache will use this value to control the memory allocation. If it is too small, allocation failure will happen at native side even Spark doesn't report it at java side. As We leverage Spark memory manager to control the memory logic, we'd resolve this confliction by giving a large fake size for AsyncDataCache, maybe same as offheap size. 4) SSD cache can't work well in my test as the file cache entry is easily larger than `8M` and will cause check failure. [Issue](https://github.com/facebookincubator/velox/issues/10098) is reported for tracking. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]
GlutenPerfBot opened a new pull request, #6007: URL: https://github.com/apache/incubator-gluten/pull/6007 Upstream Velox's New Commits: ```txt 35fbc2ea1 by Masha Basmanova, Add from_iso8601_timestamp Presto function (10062) 7164f92ae by Jimmy Lu, Optimize VectorHasher::makeValueIdsDecoded to not cache hash values when it is not beneficial (10084) 79943a76e by Pedro Eugenio Rocha Pedreira, Fix MergeJoinTest.dictionaryOutput flakyness (10087) 1235441b4 by Wei He, Add benchmark of casting string to double (10068) c1fefbe47 by Jimmy Lu, Fix lazy evaluation causing incorrect results in remaining filter (10072) c600d09aa by Kevin Wilfong, Memory corruption in PartitionedOutput when keys are not a prefix of input (10075) 179b1082b by Masha Basmanova, Add UUID Presto type (10078) 7fe89b4f7 by Andrii Rosa, Fix HashStringAllocator::clear (10053) efe648f60 by Masha Basmanova, Allow trailing spaces in cast(varchar as date) (10077) 7637d567f by Ke, Add bucket verification in TableWriter Fuzzer (10039) 4845e903a by xiaoxmeng, Parallelize the spill processing inside spiller by partitioning (9938) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] Update CPP Formatting Script [incubator-gluten]
GlutenPerfBot commented on PR #6006: URL: https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2153694756 = Performance report for TPCH SF2000 with Velox backend, for reference only query log/native_6006_time.csv log/native_master_06_05_2024_54aeb010d_time.csv difference percentage q1 33.46 34.06 0.593 101.77% q2 23.91 23.10 -0.813 96.60% q3 36.66 36.59 -0.070 99.81% q4 30.69 34.70 4.010 113.07% q5 69.29 69.24 -0.056 99.92% q6 5.86 7.84 1.980 133.81% q7 80.59 81.42 0.832 101.03% q8 84.06 87.21 3.143 103.74% q9 120.65 119.35 -1.295 98.93% q10 44.71 45.23 0.516 101.15% q11 20.61 22.65 2.045 109.92% q12 25.91 25.04 -0.875 96.62% q13 37.41 36.70 -0.715 98.09% q14 20.74 17.35 -3.393 83.64% q15 34.46 29.97 -4.490 86.97% q16 13.88 15.12 1.241 108.94% q17 104.36 104.42 0.060 100.06% q18 145.87 144.82 -1.050 99.28% q19 14.54 14.62 0.082 100.56%
Re: [PR] [VL] Daily Update Velox Version (2024_06_07) [incubator-gluten]
github-actions[bot] commented on PR #6007: URL: https://github.com/apache/incubator-gluten/pull/6007#issuecomment-2153695357 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5910] [CH] add custom type to ASTLiteral [incubator-gluten]
github-actions[bot] commented on PR #5911: URL: https://github.com/apache/incubator-gluten/pull/5911#issuecomment-2153688739 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Simplify ExecutionCtx + Handle JNI code pattern [incubator-gluten]
INBreezefall commented on PR #3239: URL: https://github.com/apache/incubator-gluten/pull/3239#issuecomment-2153660309 > A clean-up to JNI code by pulling up a common pattern `ExecutionCtx` + `Handle`. Hi, I am a Gluten newbie. I have a simple question, hope you can help me. I can see that the ExecutionCtxAware interface is implemented in many JNI classes, but its handle method is not called in the Gluten project. Can this interface be deleted from JNI, such as ColumnarBatchJniWrapper? What is the purpose of this interface? Will the handle method be called? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] Update CPP Formatting Script [incubator-gluten]
FelixYBW merged PR #6006: URL: https://github.com/apache/incubator-gluten/pull/6006 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
acvictor commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152985875 > Can you comment a bit how these info are used by Velox? Is it aligned with Spark's behavior? In Spark, properties are present in the split and obtained during listing. Earlier Velox would make one additional call to remote storage to fetch file length per openFileForRead call on a path even though this information is already present upstream wihle constructing the split. Velox now allows these values to be passed from the caller. By making this change we can eliminate one additional call to remote storage per path (RTT is in the order of 10s of milli seconds). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Update to_utc_timestamp and from_utc_timestamp tests [incubator-gluten]
github-actions[bot] commented on PR #5358: URL: https://github.com/apache/incubator-gluten/pull/5358#issuecomment-2152980133 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152967097 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
github-actions[bot] commented on PR #6004: URL: https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152910863 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152880931 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] Update CPP Formatting Script [incubator-gluten]
acvictor commented on PR #6006: URL: https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2152770803 @zhli1142015 @PHILO-HE can you please review? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] Update CPP Formatting Script [incubator-gluten]
acvictor opened a new pull request, #6006: URL: https://github.com/apache/incubator-gluten/pull/6006 ## What changes were proposed in this pull request? Update formatcppcode.sh to install clang 15 if not present. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] Update CPP Formatting Script [incubator-gluten]
github-actions[bot] commented on PR #6006: URL: https://github.com/apache/incubator-gluten/pull/6006#issuecomment-2152722473 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152714716 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152656543 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Pass file size and modification time to split [incubator-gluten]
github-actions[bot] commented on PR #5632: URL: https://github.com/apache/incubator-gluten/pull/5632#issuecomment-2152640173 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
github-actions[bot] commented on PR #6004: URL: https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152480747 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Daily Update Velox Version (2024_06_05) [incubator-gluten]
GlutenPerfBot commented on PR #5998: URL: https://github.com/apache/incubator-gluten/pull/5998#issuecomment-2152440618 = Performance report for TPCH SF2000 with Velox backend, for reference only query log/native_5998_time.csv log/native_master_06_05_2024_54aeb010d_time.csv difference percentage q1 32.75 34.06 1.306 103.99% q2 23.69 23.10 -0.595 97.49% q3 36.26 36.59 0.333 100.92% q4 32.54 34.70 2.156 106.62% q5 68.62 69.24 0.619 100.90% q6 7.79 7.84 0.046 100.59% q7 81.10 81.42 0.316 100.39% q8 85.52 87.21 1.692 101.98% q9 119.13 119.35 0.223 100.19% q10 46.19 45.23 -0.964 97.91% q11 20.33 22.65 2.318 111.40% q12 29.60 25.04 -4.566 84.58% q13 37.70 36.70 -0.998 97.35% q14 22.04 17.35 -4.687 78.73% q15 29.68 29.97 0.289 100.97% q16 14.01 15.12 1.107 107.90% q17 102.29 104.42 2.132 102.08% q18 145.89 144.82 -1.074 99.26% q19 13.53 14.62 1.087 108.03%
[PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]
GlutenPerfBot opened a new pull request, #6005: URL: https://github.com/apache/incubator-gluten/pull/6005 Upstream Velox's New Commits: ```txt 7637d567f by Ke, Add bucket verification in TableWriter Fuzzer (10039) 4845e903a by xiaoxmeng, Parallelize the spill processing inside spiller by partitioning (9938) 82cad2766 by Masha Basmanova, Rename prestosql/SIMDJsonXxx structs to JsonXxx (10070) 72f470b24 by Jialiang Tan, Order top memory users by reserved memory (10067) 4e8476f53 by Kevin Wilfong, Filters on LazyVectors in Joins can lead to incorrect results (10045) 20e48078b by wypb, Use ByteOutputStream::size() in RowContainer::storeComplexType (10055) 3832dfc88 by Athmaja N, Implement TPCH Query 11 in TpchQueryBuilder (9824) 93846c5c4 by Kevin Wilfong, MergeJoin may hang if right side throws an exception () d1f8cdff2 by Masha Basmanova, Add more items to monthly update for May 2024 (10066) 80b04bf30 by Jacob Wujciak-Jens, Use correct label for 8-core-ubuntu larger runner (10035) d7d104d63 by gaoyangxiaozhu, Fix row index wrong value issue when only project with const data column (10012) c9d36c28f by duanmeng, Add links to new docs for Memory Arbitration, RowNumber and TableWriter Fuzzers (10060) 77b9f9b42 by Jialiang Tan, Fix flaky test MultiFragmentTest.taskTerminateWithProblematicRemainingRemoteSplits (10046) 6e26aa764 by Ke, Fix data verification for Table Writer Fuzzer (10049) bb38f138f by Bikramjeet Vig, Fix min and max aggregates for floating points (9931) 278a893cf by Masha Basmanova, Add monthly update for May 2024 (10052) 79413f241 by rui-mo, Add arg generator for Presto mod decimal function (10037) a6bb4d81f by xiaoxmeng, Fix memory pool lock order reversion detected by Meta internal tsan test (10051) 3a7f8a88a by Masha Basmanova, Make 'Duplicate key' a user error in split_to_map (10054) 3aebeb660 by wypb, Refactor AggregateCompanionAdapter to remove duplicate code (9920) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Daily Update Velox Version (2024_06_06) [incubator-gluten]
github-actions[bot] commented on PR #6005: URL: https://github.com/apache/incubator-gluten/pull/6005#issuecomment-2152360386 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
github-actions[bot] commented on PR #6004: URL: https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2152276703 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Daily Update Velox Version (2024_06_05) [incubator-gluten]
PHILO-HE merged PR #5998: URL: https://github.com/apache/incubator-gluten/pull/5998 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]
github-actions[bot] commented on PR #5828: URL: https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2152039410 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
github-actions[bot] commented on PR #6004: URL: https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2151949609 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
github-actions[bot] commented on PR #6004: URL: https://github.com/apache/incubator-gluten/pull/6004#issuecomment-2151949208 Thanks for opening a pull request! Could you open an issue for this pull request on Github Issues? https://github.com/apache/incubator-gluten/issues Then could you also rename ***commit message*** and ***pull request title*** in the following format? [GLUTEN-${ISSUES_ID}][COMPONENT]feat/fix: ${detailed message} See also: * [Other pull requests](https://github.com/apache/incubator-gluten/pulls/) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[PR] [CORE] Move JoinSelectionOverrides logic to ColumnarOverrides [incubator-gluten]
zml1206 opened a new pull request, #6004: URL: https://github.com/apache/incubator-gluten/pull/6004 ## What changes were proposed in this pull request? 1. Drop custom strategy and move `JoinSelectionOverrides` logic to `ColumnarOverrides` 2. Clickhouse backend already support AQE, delete relevant code. ## How was this patch tested? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [Gluten-229] Support Spark SortShuffleManager for ClickHouse Backend [incubator-gluten]
zml1206 commented on code in PR #230: URL: https://github.com/apache/incubator-gluten/pull/230#discussion_r1629075647 ## jvm/src/main/scala/io/glutenproject/extension/StrategyOverrides.scala: ## @@ -76,8 +69,26 @@ object JoinSelectionOverrides extends Strategy with JoinSelectionHelper with SQL if (GlutenConfig.getSessionConf.forceShuffledHashJoin) { // Force use of ShuffledHashJoin in preference to SortMergeJoin. With no respect to // conf setting "spark.sql.join.preferSortMergeJoin". -val leftBuildable = canBuildShuffledHashJoinLeft(joinType) -val rightBuildable = canBuildShuffledHashJoinRight(joinType) +val (leftBuildable, rightBuildable) = if (GlutenConfig.getConf.isClickHouseBackend) { + // Currently, ClickHouse backend can not support AQE, so it needs to use join hint + // to decide the build side, after supporting AQE, will remove this. Review Comment: OK. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]
GlutenPerfBot commented on PR #6002: URL: https://github.com/apache/incubator-gluten/pull/6002#issuecomment-2151751915 = Performance report for TPCH SF2000 with Velox backend, for reference only query log/native_6002_time.csv log/native_master_06_05_2024_54aeb010d_time.csv difference percentage q1 33.59 34.06 0.466 101.39% q2 25.58 23.10 -2.483 90.29% q3 36.47 36.59 0.116 100.32% q4 35.01 34.70 -0.315 99.10% q5 68.00 69.24 1.238 101.82% q6 5.90 7.84 1.934 132.77% q7 82.99 81.42 -1.573 98.10% q8 86.42 87.21 0.790 100.91% q9 118.95 119.35 0.405 100.34% q10 43.19 45.23 2.041 104.73% q11 20.90 22.65 1.756 108.40% q12 25.60 25.04 -0.564 97.80% q13 36.91 36.70 -0.213 99.42% q14 19.27 17.35 -1.924 90.02% q15 30.06 29.97 -0.088 99.71% q16 14.22 15.12 0.894 106.29% q17 100.49 104.42 3.929 103.91% q18 146.89 144.82 -2.077 98.59% q19 16.43 14.62 -1.811 88.98%
Re: [PR] [DNM][GLUTEN-5548][VL] test dynamic setting [incubator-gluten]
zhli1142015 closed pull request #5506: [DNM][GLUTEN-5548][VL] test dynamic setting URL: https://github.com/apache/incubator-gluten/pull/5506 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [DNM] test min [incubator-gluten]
zhli1142015 closed pull request #5964: [DNM] test min URL: https://github.com/apache/incubator-gluten/pull/5964 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5910] [CH] add custom type to ASTLiteral [incubator-gluten]
github-actions[bot] commented on PR #5911: URL: https://github.com/apache/incubator-gluten/pull/5911#issuecomment-2151681224 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]
lgbo-ustc commented on issue #3351: URL: https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151622431 > How is this solved? @JkSelf @kelvin-qin 。 We also meet this problem > > ```shell > NATIVE_WRITER_CONF="--conf spark.gluten.sql.native.writer.enabled=true > --conf spark.files=$SPARK_HOME/jars/gluten.jar > --conf spark.driver.extraClassPath=$SPARK_HOME/gluten.jar > --conf spark.executor.extraClassPath=./gluten.jar > --conf spark.driver.userClassPathFirst=true > --conf spark.executor.userClassPathFirst=true > " > ``` Found the problem. The path configuration spark.driver.extraClassPath is wrong -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]
lgbo-ustc commented on issue #3351: URL: https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151621681 > ```shell > HiveFileFormat > ``` Found the problem. The path configuration `spark.driver.extraClassPath` is wrong -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240606) [incubator-gluten]
baibaichen merged PR #5999: URL: https://github.com/apache/incubator-gluten/pull/5999 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]
zhztheplayer merged PR #6002: URL: https://github.com/apache/incubator-gluten/pull/6002 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]
github-actions[bot] commented on PR #5828: URL: https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2151564344 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] HiveFileFormat has incompatible class error when running TPC-H q18 [incubator-gluten]
lgbo-ustc commented on issue #3351: URL: https://github.com/apache/incubator-gluten/issues/3351#issuecomment-2151558970 How is this solved? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5827][CH]support utc timestamp transfrom [incubator-gluten]
github-actions[bot] commented on PR #5828: URL: https://github.com/apache/incubator-gluten/pull/5828#issuecomment-2151554568 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [VL] can not write hive table on HDFS [incubator-gluten]
RaoZhiRou-Z commented on issue #5879: URL: https://github.com/apache/incubator-gluten/issues/5879#issuecomment-2151552175 @PHILO-HE I have tried the bug-fix,and there is another core error,the core stack shows that: **#0 __GI_raise (sig=sig@entry=6) at /root/work/deck/devel/toolchain/glibc-2.33/signal/raise.c:49 #1 0x7f6143cfa527 in __GI_abort () at abort.c:79 #2 0x7f61435ec089 in os::abort(bool) () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #3 0x7f61437b5363 in VMError::report_and_die() () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #4 0x7f61435f64e5 in JVM_handle_linux_signal () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #5 0x7f61435e8f48 in signalHandler(int, siginfo*, void*) () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #6 #7 0x7f5ffc54e52a in facebook::velox::dwio::common::FileSink::writeWithLogging(std::vector, std::allocator > >&) () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #8 0x7f5ffc54f102 in facebook::velox::dwio::common::FileSink::write(facebook::velox::dwio::common::DataBuffer) () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #9 0x7f5ffc3c9306 in facebook::velox::parquet::ArrowDataBufferSink::Close() () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #10 0x7f5ffc3c525a in facebook::velox::parquet::Writer::close() () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #11 0x7f5ffe1ce775 in Java_org_apache_gluten_datasource_DatasourceJniWrapper_close () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libgluten.so #12 0x7f612d018767 in ?? () #13 0x7f5ff178fca8 in ?? () #14 0x7f612d0184c7 in ?? () #15 0x7f612d018482 in ?? () #16 0x7f5ff178fc68 in ?? () #17 0x7f5fe36470e8 in ?? () #18 0x7f5ff178fcd8 in ?? () #19 0x7f5fe3647298 in ?? () #20 0x in ?? ()** -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [VL] can not write hive table on HDFS [incubator-gluten]
RaoZhiRou-Z commented on issue #5879: URL: https://github.com/apache/incubator-gluten/issues/5879#issuecomment-2151549794 @PHILO-HE I have tried the bug-fix,and there is another core error,the core stack shows that: `#0 __GI_raise (sig=sig@entry=6) at /root/work/deck/devel/toolchain/glibc-2.33/signal/raise.c:49 #1 0x7f6143cfa527 in __GI_abort () at abort.c:79 #2 0x7f61435ec089 in os::abort(bool) () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #3 0x7f61437b5363 in VMError::report_and_die() () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #4 0x7f61435f64e5 in JVM_handle_linux_signal () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #5 0x7f61435e8f48 in signalHandler(int, siginfo*, void*) () from /home/work/zjh/env/jdk/jdk1.8.0_391/jre/lib/amd64/server/libjvm.so #6 #7 0x7f5ffc54e52a in facebook::velox::dwio::common::FileSink::writeWithLogging(std::vector, std::allocator > >&) () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #8 0x7f5ffc54f102 in facebook::velox::dwio::common::FileSink::write(facebook::velox::dwio::common::DataBuffer) () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #9 0x7f5ffc3c9306 in facebook::velox::parquet::ArrowDataBufferSink::Close() () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #10 0x7f5ffc3c525a in facebook::velox::parquet::Writer::close() () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libvelox.so #11 0x7f5ffe1ce775 in Java_org_apache_gluten_datasource_DatasourceJniWrapper_close () from /home/work/zjh/bbs/spark_tmp/gluten-7bb8d2ed-1d73-4568-b5ff-cb63bdf7b59c/jni/ebcca964-d36c-4429-a13c-b5ca68e850ed/gluten-5234449826142957659/libgluten.so #12 0x7f612d018767 in ?? () #13 0x7f5ff178fca8 in ?? () #14 0x7f612d0184c7 in ?? () #15 0x7f612d018482 in ?? () #16 0x7f5ff178fc68 in ?? () #17 0x7f5fe36470e8 in ?? () #18 0x7f5ff178fcd8 in ?? () #19 0x7f5fe3647298 in ?? () #20 0x in ?? ()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5981][CH] Make the result be null when the queried field is `null` [incubator-gluten]
github-actions[bot] commented on PR #6001: URL: https://github.com/apache/incubator-gluten/pull/6001#issuecomment-2151538528 Run Gluten Clickhouse CI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Handle try_subtract, try_multiply, try_divide [incubator-gluten]
GlutenPerfBot commented on PR #5985: URL: https://github.com/apache/incubator-gluten/pull/5985#issuecomment-2151520949 = Performance report for TPCH SF2000 with Velox backend, for reference only query log/native_5985_time.csv log/native_master_06_05_2024_54aeb010d_time.csv difference percentage q1 35.32 34.06 -1.266 96.42% q2 23.73 23.10 -0.627 97.36% q3 37.62 36.59 -1.033 97.25% q4 31.56 34.70 3.138 109.94% q5 69.91 69.24 -0.676 99.03% q6 7.53 7.84 0.307 104.08% q7 82.50 81.42 -1.082 98.69% q8 87.04 87.21 0.163 100.19% q9 124.78 119.35 -5.429 95.65% q10 45.74 45.23 -0.509 98.89% q11 20.38 22.65 2.267 111.12% q12 25.56 25.04 -0.520 97.97% q13 40.29 36.70 -3.595 91.08% q14 17.81 17.35 -0.464 97.40% q15 31.24 29.97 -1.270 95.94% q16 14.42 15.12 0.692 104.80% q17 104.54 104.42 -0.114 99.89% q18 145.45 144.82 -0.629 99.57% q19 13.48 14.62 1.143 108.48%
Re: [PR] [Gluten-229] Support Spark SortShuffleManager for ClickHouse Backend [incubator-gluten]
zzcclp commented on code in PR #230: URL: https://github.com/apache/incubator-gluten/pull/230#discussion_r1628850760 ## jvm/src/main/scala/io/glutenproject/extension/StrategyOverrides.scala: ## @@ -76,8 +69,26 @@ object JoinSelectionOverrides extends Strategy with JoinSelectionHelper with SQL if (GlutenConfig.getSessionConf.forceShuffledHashJoin) { // Force use of ShuffledHashJoin in preference to SortMergeJoin. With no respect to // conf setting "spark.sql.join.preferSortMergeJoin". -val leftBuildable = canBuildShuffledHashJoinLeft(joinType) -val rightBuildable = canBuildShuffledHashJoinRight(joinType) +val (leftBuildable, rightBuildable) = if (GlutenConfig.getConf.isClickHouseBackend) { + // Currently, ClickHouse backend can not support AQE, so it needs to use join hint + // to decide the build side, after supporting AQE, will remove this. Review Comment: remove these comments please, it's not correct, now it already supports AQE. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [VL] Unsupported spark function list [please leave a comment if you plan to pick some] [incubator-gluten]
PHILO-HE commented on issue #4039: URL: https://github.com/apache/incubator-gluten/issues/4039#issuecomment-2151468471 > > ubase64: #4482 > > I see you've map the from_base64 to unbase64, and respectively I find the base64 is almost the same as to_base64, so it's just a missing or is there any other consideration? @Donvi, seems there are a few semantic differences between Spark's `unbase64` & Velox's `from_base64`. So the simple mapping has not been accepted by the community. See discussion: https://github.com/apache/incubator-gluten/pull/5242#discussion_r1548887962. I guess similarly `to_base64` cannot be mapped due to some unknown differences. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [CH] Not found column when the mixed join condition involves right keys [incubator-gluten]
baibaichen closed issue #5996: [CH] Not found column when the mixed join condition involves right keys URL: https://github.com/apache/incubator-gluten/issues/5996 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [GLUTEN-5996][CH] Fixed missing columns in join with mixed conditions [incubator-gluten]
baibaichen merged PR #5997: URL: https://github.com/apache/incubator-gluten/pull/5997 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
[I] Static build issue when GCS is enabled [incubator-gluten]
PHILO-HE opened a new issue, #6003: URL: https://github.com/apache/incubator-gluten/issues/6003 ### Problem description Here is error log in building abal (vcpkg installed version is not the required one), required by google cloud cpp. Looks raw_logging.h (an old version of absl) installed in vcpkg is included in compiling. ``` 2024-06-04T03:02:17.6948676Z [6/982] Building CXX object _deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o 2024-06-04T03:02:17.6951327Z FAILED: _deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o 2024-06-04T03:02:17.6961341Z /opt/rh/devtoolset-9/root/usr/bin/c++ -DFOLLY_HAVE_INT128_T=1 -DVELOX_ENABLE_ABFS -DVELOX_ENABLE_HDFS3 -DVELOX_ENABLE_PARQUET -DVELOX_ENABLE_S3 -I/__w/incubator-gluten/incubator-gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/include -isystem /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src -Wno-missing-field-initializers -march=native -std=c++17 -mno-avx512f -mbmi2 -D USE_VELOX_COMMON_BASE -D HAS_UNCAUGHT_EXCEPTIONS -Wall -Wextra -Wno-unused -Wno-unused-parameter-Wno-sign-compare -Wno-ignored-qualifiers-Wno-implicit-fallthrough -Wno-class-memaccess -Wno-comment -Wno-int-in-bool-context -Wno-redundant-move -Wno-array-bounds -Wno-maybe-uninitialized -Wno-unused-result -Wno-format-overflow -Wno-strict-aliasing -O3 -DNDEBUG -std=gnu++17 -fPIC -fdiagnostics-color=always -Wall -Wextra -Wcast-qual -Wc onversion-null -Wformat-security -Wmissing-declarations -Woverlength-strings -Wpointer-arith -Wundef -Wunused-local-typedefs -Wunused-result -Wvarargs -Wvla -Wwrite-strings -DNOMINMAX -MD -MT _deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o -MF _deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o.d -o _deps/absl-build/absl/base/CMakeFiles/raw_logging_internal.dir/internal/raw_logging.cc.o -c /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc 2024-06-04T03:02:17.6977549Z /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc: In function 'void absl::lts_20230125::raw_log_internal::{anonymous}::RawLogVA(absl::lts_20230125::LogSeverity, const char*, int, const char*, __va_list_tag*)': 2024-06-04T03:02:17.6981062Z /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:178:5: error: 'AsyncSignalSafeWriteError' was not declared in this scope; did you mean 'AsyncSignalSafeWriteToStderr'? 2024-06-04T03:02:17.6982922Z 178 | AsyncSignalSafeWriteError(buffer, strlen(buffer)); 2024-06-04T03:02:17.6983923Z | ^ 2024-06-04T03:02:17.6984493Z | AsyncSignalSafeWriteToStderr 2024-06-04T03:02:17.6986524Z /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc: At global scope: 2024-06-04T03:02:17.6992358Z /__w/incubator-gluten/incubator-gluten/ep/build-velox/build/velox_ep/_build/release/_deps/absl-src/absl/base/internal/raw_logging.cc:206:6: warning: no previous declaration for 'void absl::lts_20230125::raw_log_internal::AsyncSignalSafeWriteError(const char*, size_t)' [-Wmissing-declarations] 2024-06-04T03:02:17.6995703Z 206 | void AsyncSignalSafeWriteError(const char* s, size_t len) { ``` ### System information N/A ### CMake log _No response_ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [I] [VL] Unsupported spark function list [please leave a comment if you plan to pick some] [incubator-gluten]
Donvi commented on issue #4039: URL: https://github.com/apache/incubator-gluten/issues/4039#issuecomment-2151449323 > ubase64: #4482 I see you've map the from_base64 to unbase64, and respectively I find the base64 is almost the same as to_base64, so it's just a missing or is there any other consideration? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Handle try_subtract, try_multiply, try_divide [incubator-gluten]
zhli1142015 merged PR #5985: URL: https://github.com/apache/incubator-gluten/pull/5985 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
Re: [PR] [VL] Make ColumnarBatch::getRowBytes leak-safe [incubator-gluten]
zhztheplayer commented on PR #6002: URL: https://github.com/apache/incubator-gluten/pull/6002#issuecomment-2151428336 cc @JkSelf -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org