rekado pushed a commit to branch wip-python-team
in repository guix.

commit 67112f4024a4c20c327e5a00b047de33577ba2e7
Author: Ricardo Wurmus <rek...@elephly.net>
AuthorDate: Tue Apr 30 18:36:14 2024 +0200

    gnu: Add apache-orc.
    
    * gnu/packages/databases.scm (apache-orc): New variable.
    
    Change-Id: I9e7df4a03a5d2f258ff44d9705f539f9fc925a99
---
 gnu/packages/databases.scm | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm
index 238ba34053..8ad73e400a 100644
--- a/gnu/packages/databases.scm
+++ b/gnu/packages/databases.scm
@@ -4510,6 +4510,90 @@ transforms idiomatic python function calls to 
well-formed SQL queries.")
 the SQL language using a syntax that reflects the resulting query.")
     (license license:asl2.0)))
 
+(define-public apache-orc
+  (package
+    (name "apache-orc")
+    (version "2.0.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/apache/orc";)
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "1fi6d045wakks0x8clplyxgal342kljqjql7vq5gbd6a2qnaz6m2"))))
+    (build-system cmake-build-system)
+    (arguments
+     (list
+      #:build-type "Release"
+      #:configure-flags
+      #~(list "-DBUILD_JAVA=OFF"
+              "-DINSTALL_VENDORED_LIBS=OFF"
+              "-DCMAKE_CXX_FLAGS=-fPIC"
+              (string-append "-DGTEST_HOME=" #$(this-package-native-input 
"googletest"))
+              (string-append "-DZSTD_HOME=" (assoc-ref %build-inputs 
"zstd:lib"))
+              (string-append "-DZLIB_HOME=" #$(this-package-input "zlib"))
+              (string-append "-DPROTOBUF_HOME=" #$(this-package-input 
"protobuf"))
+              (string-append "-DLZ4_HOME=" #$(this-package-input "lz4"))
+              (string-append "-DSNAPPY_HOME=" #$(this-package-input "snappy")))
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'unpack 'disable-bad-test
+            (lambda _
+              ;; This one test fails with an obscure error:
+              ;;
+              ;; Expected: (std::string::npos) != (error.find(error_msg)),
+              ;; actual: 18446744073709551615 vs 18446744073709551615
+              (substitute* "tools/test/TestFileScan.cc"
+                (("findProgram\\(\"tools/src/orc-scan\"\\);" m)
+                 (string-append m "return;")))))
+          (add-after 'unpack 'do-not-download-orc-format
+            (lambda _
+              (substitute* "cmake_modules/ThirdpartyToolchain.cmake"
+                (("URL \"https://archive.apache.org/dist/orc/orc-format.*";)
+                 (string-append "URL \"file://"
+                                #$(this-package-native-input "orc-format")
+                                "\"\n")))))
+          (add-after 'unpack 'timezone-fallback
+            (lambda _
+              ;; In the build container we don't have /etc/localtime
+              (substitute* "c++/src/Timezone.cc"
+                (("return getTimezoneByFilename\\(LOCAL_TIMEZONE\\);")
+                 "if 
(!std::filesystem::exists(std::filesystem::path(LOCAL_TIMEZONE))) {
+  return getTimezoneByName(\"UTC\");
+}
+return getTimezoneByFilename(LOCAL_TIMEZONE);"))))
+          (add-before 'check 'pre-check
+            (lambda* (#:key inputs #:allow-other-keys)
+              (setenv "TZDIR" (search-input-directory inputs
+                                                      "share/zoneinfo")))))))
+    (inputs
+     `(("lz4" ,lz4)
+       ("protobuf" ,protobuf)
+       ("snappy" ,snappy)
+       ("zlib" ,zlib "static")
+       ("zstd" ,zstd)
+       ("zstd:lib" ,zstd "lib")))
+    (native-inputs
+     `(("googletest" ,googletest)
+       ("orc-format" ,(origin
+                        (method url-fetch)
+                        (uri 
"https://archive.apache.org/dist/orc/orc-format-1.0.0/\
+orc-format-1.0.0.tar.gz")
+                        (sha256
+                         (base32
+                          
"1mccbna3mqhhlqs4pw0fa4pgjnq4c41jhxrh84mq27sbz5gsx7vk"))))
+       ("pkg-config" ,pkg-config)
+       ("tzdata" ,tzdata-for-tests)))
+    (home-page "https://orc.apache.org/";)
+    (synopsis "Columnar storage for Hadoop workloads")
+    (description "ORC is a self-describing type-aware columnar file format
+designed for Hadoop workloads.  It is optimized for large streaming reads, but
+with integrated support for finding required rows quickly.")
+    (license license:asl2.0)))
+
 ;; There are many wrappers for this in other languages. When touching, please
 ;; be sure to ensure all dependencies continue to build.
 (define-public apache-arrow

Reply via email to