This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 24eece3e6 ORC-2102: [C++] Remove HDFS support
24eece3e6 is described below

commit 24eece3e6f3f1bc9903952327136b2dcedfe3abd
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Feb 22 19:03:45 2026 -0800

    ORC-2102: [C++] Remove HDFS support
    
    ### What changes were proposed in this pull request?
    
    This PR aims to remove HDFS support code from Apache ORC repository.
    
    ### Why are the changes needed?
    
    Apache ORC 2.0.1 deprecated HDFS Support to address the following issues.
    - https://github.com/apache/orc/pull/1885
      - #1857
      - https://github.com/apache/orc/issues/2134
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: `Gemini 3.1 Pro (High)` on `Antigravity`
    
    This closes #2136 .
    
    Closes #2544 from dongjoon-hyun/ORC-2102.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/build_and_test.yml    |   1 -
 CMakeLists.txt                          |   3 -
 c++/include/orc/OrcFile.hh              |   8 --
 c++/libs/libhdfspp/imported_timestamp   |  10 --
 c++/libs/libhdfspp/libhdfspp.tar.gz     | Bin 948949 -> 0 bytes
 c++/libs/libhdfspp/pull_hdfs.sh         |  32 ------
 c++/src/CMakeLists.txt                  |   8 --
 c++/src/OrcFile.cc                      |  10 +-
 c++/src/OrcHdfsFile.cc                  | 178 --------------------------------
 cmake_modules/ThirdpartyToolchain.cmake |  68 +-----------
 conan/all/conanfile.py                  |   1 -
 11 files changed, 2 insertions(+), 317 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index c0884f51e..d5e7f7685 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -155,7 +155,6 @@ jobs:
         cd build
         cmake .. -G "Visual Studio 17 2022" \
           -DCMAKE_BUILD_TYPE=RELEASE \
-          -DBUILD_LIBHDFSPP=OFF \
           -DBUILD_TOOLS=OFF \
           -DBUILD_JAVA=OFF \
           -DANALYZE_JAVA=OFF \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae0f9b44f..3a23e1258 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,9 +41,6 @@ option (ANALYZE_JAVA
     "Run static analysis of the Java"
      OFF)
 
-option (BUILD_LIBHDFSPP
-    "Include LIBHDFSPP library in the build process"
-     OFF)
 
 option (BUILD_SPARSEHASH
     "Include sparsehash library in the build process"
diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh
index ea71567c5..1b5a61139 100644
--- a/c++/include/orc/OrcFile.hh
+++ b/c++/include/orc/OrcFile.hh
@@ -135,14 +135,6 @@ namespace orc {
   std::unique_ptr<InputStream> readLocalFile(const std::string& path,
                                              ReaderMetrics* metrics = nullptr);
 
-  /**
-   * Create a stream to an HDFS file.
-   * @param path the uri of the file in HDFS
-   * @param metrics the metrics of the reader
-   */
-  [[deprecated("readHdfsFile is deprecated in 2.0.1")]] 
std::unique_ptr<InputStream> readHdfsFile(
-      const std::string& path, ReaderMetrics* metrics = nullptr);
-
   /**
    * Create a reader to read the ORC file.
    * @param stream the stream to read
diff --git a/c++/libs/libhdfspp/imported_timestamp 
b/c++/libs/libhdfspp/imported_timestamp
deleted file mode 100644
index 84965ce4a..000000000
--- a/c++/libs/libhdfspp/imported_timestamp
+++ /dev/null
@@ -1,10 +0,0 @@
-Wed Aug 30 10:56:51 EDT 2017
-HDFS-10787
-commit 9587bb04a818a2661e264f619b09c15ce10ff38e
-Author: Anatoli Shein <[email protected]>
-Date:   Wed Aug 30 10:49:42 2017 -0400
-
-    fixed warnings3
-diffs: --------------
-       --------------
-Wed Aug 30 10:56:51 EDT 2017
diff --git a/c++/libs/libhdfspp/libhdfspp.tar.gz 
b/c++/libs/libhdfspp/libhdfspp.tar.gz
deleted file mode 100644
index 35c4d6127..000000000
Binary files a/c++/libs/libhdfspp/libhdfspp.tar.gz and /dev/null differ
diff --git a/c++/libs/libhdfspp/pull_hdfs.sh b/c++/libs/libhdfspp/pull_hdfs.sh
deleted file mode 100755
index a207a93f8..000000000
--- a/c++/libs/libhdfspp/pull_hdfs.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-if [ -z "$1" ]; then
-    echo "Usage: pull_hdfs [path_to_hdfs_git_root]"
-    exit 1;
-fi
-if [ ! -d "$1" ]; then
-    echo "$1 is not a directory"
-fi
-if [ ! -d "$1/hadoop-hdfs-project" ]; then
-    echo "$1 is not the root of a hadoop git checkout"
-fi
-
-HADOOP_ROOT=$1
-echo HADOOP_ROOT=$HADOOP_ROOT
-OUT=$(readlink -m `dirname $0`)
-echo OUT=$OUT
-TS=$OUT/imported_timestamp
-
-    cd $HADOOP_ROOT &&
-    mvn -pl :hadoop-hdfs-native-client -Pnative compile 
-Dnative_make_args="copy_hadoop_files"
-    (date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS;  
\
-        echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \
-        echo "       --------------" >> $TS)
-    cd $OUT &&
-    #Delete everything except for pull_hdfs.sh and imported_timestamp
-    find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! 
-name '..' -exec rm -rf {} + &&
-    cp -R 
$HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp
 . &&
-    cp -R 
$HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern
 libhdfspp/ &&
-    cd libhdfspp &&
-       tar -czf ../libhdfspp.tar.gz * &&
-       cd .. &&
-       rm -rf libhdfspp &&
-       date >> $TS
\ No newline at end of file
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index a1fd549ce..efddae23e 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -191,9 +191,6 @@ set(SOURCE_FILES
   Vector.cc
   Writer.cc)
 
-if(BUILD_LIBHDFSPP)
-  set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc)
-endif(BUILD_LIBHDFSPP)
 
 if(BUILD_ENABLE_AVX512)
   set(SOURCE_FILES
@@ -212,7 +209,6 @@ target_link_libraries (orc
     $<BUILD_INTERFACE:orc::Snappy>
     $<BUILD_INTERFACE:orc::lz4>
     $<BUILD_INTERFACE:orc::zstd>
-    $<BUILD_INTERFACE:${LIBHDFSPP_LIBRARIES}>
     $<BUILD_INTERFACE:$<TARGET_NAME_IF_EXISTS:orc::sparsehash>>
   )
 
@@ -227,16 +223,12 @@ target_include_directories (orc
   PRIVATE
     ${CMAKE_CURRENT_BINARY_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}
-    ${LIBHDFSPP_INCLUDE_DIR}
 )
 
 if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL 
"GNU")
   target_compile_options(orc PRIVATE -Wall -Wextra 
$<$<BOOL:${STOP_BUILD_ON_WARNING}>:-Werror>)
 endif ()
 
-if (BUILD_LIBHDFSPP)
-  target_compile_definitions(orc PUBLIC -DBUILD_LIBHDFSPP)
-endif (BUILD_LIBHDFSPP)
 
 if (BUILD_SPARSEHASH)
   target_compile_definitions(orc PUBLIC -DBUILD_SPARSEHASH)
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
index be8672432..470adfc1f 100644
--- a/c++/src/OrcFile.cc
+++ b/c++/src/OrcFile.cc
@@ -103,15 +103,7 @@ namespace orc {
   }
 
   std::unique_ptr<InputStream> readFile(const std::string& path, 
ReaderMetrics* metrics) {
-#ifdef BUILD_LIBHDFSPP
-    if (strncmp(path.c_str(), "hdfs://", 7) == 0) {
-      return orc::readHdfsFile(std::string(path), metrics);
-    } else {
-#endif
-      return orc::readLocalFile(std::string(path), metrics);
-#ifdef BUILD_LIBHDFSPP
-    }
-#endif
+    return orc::readLocalFile(std::string(path), metrics);
   }
 
   DIAGNOSTIC_POP
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
deleted file mode 100644
index d878e276c..000000000
--- a/c++/src/OrcHdfsFile.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "orc/OrcFile.hh"
-
-#include "Adaptor.hh"
-#include "Utils.hh"
-#include "orc/Exceptions.hh"
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "hdfspp/hdfspp.h"
-
-namespace orc {
-
-  DIAGNOSTIC_PUSH
-
-#ifdef __clang__
-  DIAGNOSTIC_IGNORE("-Wunused-private-field")
-#endif
-
-  class HdfsFileInputStream : public InputStream {
-   private:
-    std::string filename_;
-    std::unique_ptr<hdfs::FileHandle> file_;
-    std::unique_ptr<hdfs::FileSystem> fileSystem_;
-    uint64_t totalLength_;
-    const uint64_t readSize_ = 1024 * 1024;  // 1 MB
-    ReaderMetrics* metrics_;
-
-   public:
-    HdfsFileInputStream(std::string filename, ReaderMetrics* metrics) : 
metrics_(metrics) {
-      filename_ = filename;
-
-      // Building a URI object from the given uri_path
-      hdfs::URI uri;
-      try {
-        uri = hdfs::URI::parse_from_string(filename_);
-      } catch (const hdfs::uri_parse_error&) {
-        throw ParseError("Malformed URI: " + filename_);
-      }
-
-      // This sets conf path to default "$HADOOP_CONF_DIR" or 
"/etc/hadoop/conf"
-      // and loads configs core-site.xml and hdfs-site.xml from the conf path
-      hdfs::ConfigParser parser;
-      if (!parser.LoadDefaultResources()) {
-        throw ParseError("Could not load default resources. ");
-      }
-      auto stats = parser.ValidateResources();
-      // validating core-site.xml
-      if (!stats[0].second.ok()) {
-        throw ParseError(stats[0].first + " is invalid: " + 
stats[0].second.ToString());
-      }
-      // validating hdfs-site.xml
-      if (!stats[1].second.ok()) {
-        throw ParseError(stats[1].first + " is invalid: " + 
stats[1].second.ToString());
-      }
-      hdfs::Options options;
-      if (!parser.get_options(options)) {
-        throw ParseError("Could not load Options object. ");
-      }
-      hdfs::IoService* io_service = hdfs::IoService::New();
-      // Wrapping file_system into a unique pointer to guarantee deletion
-      fileSystem_ =
-          std::unique_ptr<hdfs::FileSystem>(hdfs::FileSystem::New(io_service, 
"", options));
-      if (fileSystem_.get() == nullptr) {
-        throw ParseError("Can't create FileSystem object. ");
-      }
-      hdfs::Status status;
-      // Checking if the user supplied the host
-      if (!uri.get_host().empty()) {
-        // Using port if supplied, otherwise using "" to look up port in 
configs
-        std::string port = uri.has_port() ? std::to_string(uri.get_port()) : 
"";
-        status = fileSystem_->Connect(uri.get_host(), port);
-        if (!status.ok()) {
-          throw ParseError("Can't connect to " + uri.get_host() + ":" + port + 
". " +
-                           status.ToString());
-        }
-      } else {
-        status = fileSystem_->ConnectToDefaultFs();
-        if (!status.ok()) {
-          if (!options.defaultFS.get_host().empty()) {
-            throw ParseError("Error connecting to " + options.defaultFS.str() 
+ ". " +
-                             status.ToString());
-          } else {
-            throw ParseError("Error connecting to the cluster: defaultFS is 
empty. " +
-                             status.ToString());
-          }
-        }
-      }
-
-      if (fileSystem_.get() == nullptr) {
-        throw ParseError("Can't connect the file system. ");
-      }
-
-      hdfs::FileHandle* file_raw = nullptr;
-      status = fileSystem_->Open(uri.get_path(true), &file_raw);
-      if (!status.ok()) {
-        throw ParseError("Can't open " + uri.get_path(true) + ". " + 
status.ToString());
-      }
-      // Wrapping file_raw into a unique pointer to guarantee deletion
-      file_.reset(file_raw);
-
-      hdfs::StatInfo stat_info;
-      status = fileSystem_->GetFileInfo(uri.get_path(true), stat_info);
-      if (!status.ok()) {
-        throw ParseError("Can't stat " + uri.get_path(true) + ". " + 
status.ToString());
-      }
-      totalLength_ = stat_info.length;
-    }
-
-    uint64_t getLength() const override {
-      return totalLength_;
-    }
-
-    uint64_t getNaturalReadSize() const override {
-      return readSize_;
-    }
-
-    void read(void* buf, uint64_t length, uint64_t offset) override {
-      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
-      if (!buf) {
-        throw ParseError("Buffer is null");
-      }
-
-      char* buf_ptr = reinterpret_cast<char*>(buf);
-      hdfs::Status status;
-      size_t total_bytes_read = 0;
-      size_t last_bytes_read = 0;
-
-      do {
-        status =
-            file_->PositionRead(buf_ptr, static_cast<size_t>(length) - 
total_bytes_read,
-                                static_cast<off_t>(offset + total_bytes_read), 
&last_bytes_read);
-        if (!status.ok()) {
-          throw ParseError("Error reading the file: " + status.ToString());
-        }
-        total_bytes_read += last_bytes_read;
-        buf_ptr += last_bytes_read;
-      } while (total_bytes_read < length);
-    }
-
-    const std::string& getName() const override {
-      return filename_;
-    }
-
-    ~HdfsFileInputStream() override;
-  };
-
-  DIAGNOSTIC_POP
-
-  HdfsFileInputStream::~HdfsFileInputStream() {}
-
-  std::unique_ptr<InputStream> readHdfsFile(const std::string& path, 
ReaderMetrics* metrics) {
-    return std::make_unique<HdfsFileInputStream>(path, metrics);
-  }
-}  // namespace orc
diff --git a/cmake_modules/ThirdpartyToolchain.cmake 
b/cmake_modules/ThirdpartyToolchain.cmake
index c03afa8f2..a130b7dfe 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -702,70 +702,4 @@ if(BUILD_SPARSEHASH)
   endblock()
 endif()
 
-# ----------------------------------------------------------------------
-# LIBHDFSPP
-if(BUILD_LIBHDFSPP)
-  set (BUILD_LIBHDFSPP FALSE)
-  if(ORC_CXX_HAS_THREAD_LOCAL)
-    find_package(CyrusSASL)
-    find_package(OpenSSL)
-    find_package(Threads)
-    if (CYRUS_SASL_SHARED_LIB AND OPENSSL_LIBRARIES)
-      set (BUILD_LIBHDFSPP TRUE)
-      set (LIBHDFSPP_PREFIX "${THIRDPARTY_DIR}/libhdfspp_ep-install")
-      set (LIBHDFSPP_INCLUDE_DIR "${LIBHDFSPP_PREFIX}/include")
-      set (LIBHDFSPP_STATIC_LIB_NAME hdfspp_static)
-      set (LIBHDFSPP_STATIC_LIB 
"${LIBHDFSPP_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${LIBHDFSPP_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
-      set (LIBHDFSPP_SRC_URL 
"${PROJECT_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz")
-      set (LIBHDFSPP_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                                -DCMAKE_INSTALL_PREFIX=${LIBHDFSPP_PREFIX}
-                                -DPROTOBUF_INCLUDE_DIR=${PROTOBUF_INCLUDE_DIR}
-                                -DPROTOBUF_LIBRARY=${PROTOBUF_STATIC_LIB}
-                                -DPROTOBUF_PROTOC_LIBRARY=${PROTOC_STATIC_LIB}
-                                
-DPROTOBUF_PROTOC_EXECUTABLE=${PROTOBUF_EXECUTABLE}
-                                -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR}
-                                -DCMAKE_C_FLAGS=${EP_C_FLAGS}
-                                -DBUILD_SHARED_LIBS=OFF
-                                -DHDFSPP_LIBRARY_ONLY=TRUE
-                                -DBUILD_SHARED_HDFSPP=FALSE)
-
-      if (BUILD_POSITION_INDEPENDENT_LIB)
-        set(LIBHDFSPP_CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS} 
-DCMAKE_POSITION_INDEPENDENT_CODE=ON)
-      endif ()
-
-      ExternalProject_Add (libhdfspp_ep
-        DEPENDS orc::protobuf
-        URL ${LIBHDFSPP_SRC_URL}
-        LOG_DOWNLOAD 0
-        LOG_CONFIGURE 0
-        LOG_BUILD 0
-        LOG_INSTALL 0
-        BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
-        CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
-
-      orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} 
${LIBHDFSPP_INCLUDE_DIR})
-
-      set (LIBHDFSPP_LIBRARIES
-           libhdfspp
-           ${CYRUS_SASL_SHARED_LIB}
-           ${OPENSSL_LIBRARIES}
-           ${CMAKE_THREAD_LIBS_INIT})
-
-    elseif(CYRUS_SASL_SHARED_LIB)
-      message(STATUS
-      "WARNING: Libhdfs++ library was not built because the required OpenSSL 
library was not found")
-    elseif(OPENSSL_LIBRARIES)
-      message(STATUS
-      "WARNING: Libhdfs++ library was not built because the required CyrusSASL 
library was not found")
-    else ()
-      message(STATUS
-      "WARNING: Libhdfs++ library was not built because the required CyrusSASL 
and OpenSSL libraries were not found")
-    endif(CYRUS_SASL_SHARED_LIB AND OPENSSL_LIBRARIES)
-  else(ORC_CXX_HAS_THREAD_LOCAL)
-    message(STATUS
-    "WARNING: Libhdfs++ library was not built because the required feature
-    thread_local storage is not supported by your compiler. Known compilers 
that
-    support this feature: GCC, Visual Studio, Clang (community version),
-    Clang (version for iOS 9 and later), Clang (version for Xcode 8 and 
later)")
-  endif(ORC_CXX_HAS_THREAD_LOCAL)
-endif(BUILD_LIBHDFSPP)
+
diff --git a/conan/all/conanfile.py b/conan/all/conanfile.py
index cc79f5b0d..56c7b3407 100644
--- a/conan/all/conanfile.py
+++ b/conan/all/conanfile.py
@@ -119,7 +119,6 @@ class OrcRecipe(ConanFile):
         tc.variables["BUILD_JAVA"] = False
         tc.variables["BUILD_CPP_TESTS"] = False
         tc.variables["BUILD_TOOLS"] = self.options.build_tools
-        tc.variables["BUILD_LIBHDFSPP"] = False
         tc.variables["BUILD_POSITION_INDEPENDENT_LIB"] = 
bool(self.options.get_safe("fPIC", True))
         tc.variables["INSTALL_VENDORED_LIBS"] = False
         # AVX512 support is determined by ORC_USER_SIMD_LEVEL env var at 
runtime, defaults to off

Reply via email to