This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8deffb5 ARROW-3355: [R] Support for factors
8deffb5 is described below
commit 8deffb523dd0f42c4f268a79ada264fbfaa754d4
Author: Romain Francois <[email protected]>
AuthorDate: Mon Oct 8 04:18:38 2018 -0400
ARROW-3355: [R] Support for factors
With the slight discomfort that in arrow indices are 0-based and in R
factor indices are 1 based, so we need to copy the data in both directions.
Author: Romain Francois <[email protected]>
Closes #2711 from romainfrancois/feature/3355-factors and squashes the
following commits:
a59b79514 <Romain Francois> using GetValuesSafely
9207ded6f <Romain Francois> support for dictionaries with indices of types
int8, int16, int32.
04431a59e <Romain Francois> handle R <-> arrow time differences.
12013bb45 <Romain Francois> custom DictionaryArrayInt32Indices_to_Vector
function
b44a1a189 <Romain Francois> custom MakeFactorArray function
6a44a87a2 <Romain Francois> ARROW-3340: support for POSIXct vectors
bdc8aa0be <Romain Francois> array supports Date (either from int or double
as R is lazy about it). (ARROW-3340)
9c0aedcca <Romain Francois> - static_ptr related things.
f6c955d24 <Romain Francois> Test empty arrays and arrays with only nulls
4f52335a7 <Romain Francois> using BitUtil::BytesForBits(n)
:heavy_check_mark:
38a3832b9 <Romain Francois> test dictionary column in record batch
007a1cf06 <Romain Francois> Move dictionary to its own file
f745a0d9e <Romain Francois> test DictionaryArray<string, int32> -> factor
adf6edf83 <Romain Francois> test factor -> DictionaryArray
3f82558c6 <Romain Francois> Convert factor to DictionaryArray
34d8e1186 <Romain Francois> minimal support for arrow::DictionaryType
---
r/DESCRIPTION | 13 +-
r/NAMESPACE | 2 +-
r/R/R6.R | 2 +-
r/R/RcppExports.R | 180 ++++++-----
r/R/array.R | 18 +-
r/R/dictionary.R | 44 +++
r/R/enums.R | 37 ++-
r/man/dictionary.Rd | 18 ++
r/src/ChunkedArray.cpp | 9 +-
r/src/DataType.cpp | 29 ++
r/src/RcppExports.cpp | 592 +++++++++++++++++++++---------------
r/src/array.cpp | 388 ++++++++++++++++++++---
r/src/arrow_types.h | 32 +-
r/tests/testthat/test-Array.R | 146 +++++++++
r/tests/testthat/test-DataType.R | 12 +
r/tests/testthat/test-RecordBatch.R | 52 +++-
16 files changed, 1164 insertions(+), 410 deletions(-)
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index bf76b72..40253a8 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -12,24 +12,26 @@ Encoding: UTF-8
LazyData: true
SystemRequirements: C++11
LinkingTo:
- Rcpp (>= 0.12.18)
+ Rcpp (>= 0.12.18.2)
Imports:
- Rcpp (>= 0.12.18),
+ Rcpp (>= 0.12.18.2),
rlang,
purrr,
assertthat,
glue,
R6,
- vctrs,
+ vctrs (>= 0.0.0.9000),
fs,
tibble,
crayon
Remotes:
- r-lib/vctrs
+ r-lib/vctrs,
+ RcppCore/Rcpp
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.0.9000
Suggests:
- testthat
+ testthat,
+ lubridate
Collate:
'enums.R'
'R6.R'
@@ -44,6 +46,7 @@ Collate:
'Struct.R'
'Table.R'
'array.R'
+ 'dictionary.R'
'memory_pool.R'
'reexports-tibble.R'
'zzz.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index d8dfecd..cf5f226 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -1,7 +1,6 @@
# Generated by roxygen2: do not edit by hand
S3method("!=","arrow::Object")
-S3method("$","arrow-enum")
S3method("==","arrow::Array")
S3method("==","arrow::DataType")
S3method("==","arrow::Field")
@@ -22,6 +21,7 @@ export(chunked_array)
export(date32)
export(date64)
export(decimal)
+export(dictionary)
export(float16)
export(float32)
export(float64)
diff --git a/r/R/R6.R b/r/R/R6.R
index 80bdf8e..734ddc0 100644
--- a/r/R/R6.R
+++ b/r/R/R6.R
@@ -96,7 +96,7 @@
LIST = `arrow::ListType`$new(self$pointer()),
STRUCT = `arrow::StructType`$new(self$pointer()),
UNION = stop("Type UNION not implemented yet"),
- DICTIONARY = stop("Type DICTIONARY not implemented yet"),
+ DICTIONARY = `arrow::DictionaryType`$new(self$pointer()),
MAP = stop("Type MAP not implemented yet")
)
}
diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R
index c70a515..0d0299f 100644
--- a/r/R/RcppExports.R
+++ b/r/R/RcppExports.R
@@ -1,74 +1,6 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-Array__from_vector <- function(x) {
- .Call(`_arrow_Array__from_vector`, x)
-}
-
-Array__as_vector <- function(array) {
- .Call(`_arrow_Array__as_vector`, array)
-}
-
-Array__Slice1 <- function(array, offset) {
- .Call(`_arrow_Array__Slice1`, array, offset)
-}
-
-Array__Slice2 <- function(array, offset, length) {
- .Call(`_arrow_Array__Slice2`, array, offset, length)
-}
-
-Array__IsNull <- function(x, i) {
- .Call(`_arrow_Array__IsNull`, x, i)
-}
-
-Array__IsValid <- function(x, i) {
- .Call(`_arrow_Array__IsValid`, x, i)
-}
-
-Array__length <- function(x) {
- .Call(`_arrow_Array__length`, x)
-}
-
-Array__offset <- function(x) {
- .Call(`_arrow_Array__offset`, x)
-}
-
-Array__null_count <- function(x) {
- .Call(`_arrow_Array__null_count`, x)
-}
-
-Array__type <- function(x) {
- .Call(`_arrow_Array__type`, x)
-}
-
-Array__ToString <- function(x) {
- .Call(`_arrow_Array__ToString`, x)
-}
-
-Array__type_id <- function(x) {
- .Call(`_arrow_Array__type_id`, x)
-}
-
-Array__Equals <- function(lhs, rhs) {
- .Call(`_arrow_Array__Equals`, lhs, rhs)
-}
-
-Array__ApproxEquals <- function(lhs, rhs) {
- .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
-}
-
-Array__data <- function(array) {
- .Call(`_arrow_Array__data`, array)
-}
-
-Array__RangeEquals <- function(self, other, start_idx, end_idx,
other_start_idx) {
- .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx,
other_start_idx)
-}
-
-Array__Mask <- function(array) {
- .Call(`_arrow_Array__Mask`, array)
-}
-
ArrayData__get_type <- function(x) {
.Call(`_arrow_ArrayData__get_type`, x)
}
@@ -305,20 +237,24 @@ Object__pointer_address <- function(obj) {
.Call(`_arrow_Object__pointer_address`, obj)
}
-Field__initialize <- function(name, type, nullable = TRUE) {
- .Call(`_arrow_Field__initialize`, name, type, nullable)
+DictionaryType__initialize <- function(type, array, ordered) {
+ .Call(`_arrow_DictionaryType__initialize`, type, array, ordered)
}
-Field__ToString <- function(type) {
- .Call(`_arrow_Field__ToString`, type)
+DictionaryType__index_type <- function(type) {
+ .Call(`_arrow_DictionaryType__index_type`, type)
}
-Field__name <- function(type) {
- .Call(`_arrow_Field__name`, type)
+DictionaryType__name <- function(type) {
+ .Call(`_arrow_DictionaryType__name`, type)
}
-Field__nullable <- function(type) {
- .Call(`_arrow_Field__nullable`, type)
+DictionaryType__dictionary <- function(type) {
+ .Call(`_arrow_DictionaryType__dictionary`, type)
+}
+
+DictionaryType__ordered <- function(type) {
+ .Call(`_arrow_DictionaryType__ordered`, type)
}
MemoryPool__default <- function() {
@@ -421,3 +357,95 @@ Table__column <- function(table, i) {
.Call(`_arrow_Table__column`, table, i)
}
+Array__from_vector <- function(x) {
+ .Call(`_arrow_Array__from_vector`, x)
+}
+
+Array__as_vector <- function(array) {
+ .Call(`_arrow_Array__as_vector`, array)
+}
+
+Array__Slice1 <- function(array, offset) {
+ .Call(`_arrow_Array__Slice1`, array, offset)
+}
+
+Array__Slice2 <- function(array, offset, length) {
+ .Call(`_arrow_Array__Slice2`, array, offset, length)
+}
+
+Array__IsNull <- function(x, i) {
+ .Call(`_arrow_Array__IsNull`, x, i)
+}
+
+Array__IsValid <- function(x, i) {
+ .Call(`_arrow_Array__IsValid`, x, i)
+}
+
+Array__length <- function(x) {
+ .Call(`_arrow_Array__length`, x)
+}
+
+Array__offset <- function(x) {
+ .Call(`_arrow_Array__offset`, x)
+}
+
+Array__null_count <- function(x) {
+ .Call(`_arrow_Array__null_count`, x)
+}
+
+Array__type <- function(x) {
+ .Call(`_arrow_Array__type`, x)
+}
+
+Array__ToString <- function(x) {
+ .Call(`_arrow_Array__ToString`, x)
+}
+
+Array__type_id <- function(x) {
+ .Call(`_arrow_Array__type_id`, x)
+}
+
+Array__Equals <- function(lhs, rhs) {
+ .Call(`_arrow_Array__Equals`, lhs, rhs)
+}
+
+Array__ApproxEquals <- function(lhs, rhs) {
+ .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+}
+
+Array__data <- function(array) {
+ .Call(`_arrow_Array__data`, array)
+}
+
+Array__RangeEquals <- function(self, other, start_idx, end_idx,
other_start_idx) {
+ .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx,
other_start_idx)
+}
+
+Array__Mask <- function(array) {
+ .Call(`_arrow_Array__Mask`, array)
+}
+
+DictionaryArray__indices <- function(array) {
+ .Call(`_arrow_DictionaryArray__indices`, array)
+}
+
+DictionaryArray__dictionary <- function(array) {
+ .Call(`_arrow_DictionaryArray__dictionary`, array)
+}
+
+Field__initialize <- function(name, type, nullable = TRUE) {
+ .Call(`_arrow_Field__initialize`, name, type, nullable)
+}
+
+Field__ToString <- function(type) {
+ .Call(`_arrow_Field__ToString`, type)
+}
+
+Field__name <- function(type) {
+ .Call(`_arrow_Field__name`, type)
+}
+
+Field__nullable <- function(type) {
+ .Call(`_arrow_Field__nullable`, type)
+}
+
diff --git a/r/R/array.R b/r/R/array.R
index 6e90c7d..7e64daf 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -46,6 +46,14 @@
)
)
+`arrow::Array`$dispatch <- function(xp){
+ a <- `arrow::Array`$new(xp)
+ if(a$type_id() == Type$DICTIONARY){
+ a <- `arrow::DictionaryArray`$new(xp)
+ }
+ a
+}
+
#' @export
`length.arrow::Array` <- function(x) x$length()
@@ -58,5 +66,13 @@
#'
#' @export
array <- function(...){
- `arrow::Array`$new(Array__from_vector(vctrs::vec_c(...)))
+ `arrow::Array`$dispatch(Array__from_vector(vctrs::vec_c(...)))
}
+
+`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit =
`arrow::Array`,
+ public = list(
+ indices = function()
`arrow::Array`$dispatch(DictionaryArray__indices(self)),
+ dictionary = function()
`arrow::Array`$dispatch(DictionaryArray__dictionary(self))
+ )
+)
+
diff --git a/r/R/dictionary.R b/r/R/dictionary.R
new file mode 100644
index 0000000..b70e70a
--- /dev/null
+++ b/r/R/dictionary.R
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include R6.R
+
+`arrow::DictionaryType` <- R6Class("arrow::DictionaryType",
+ inherit = `arrow::FixedWidthType`,
+ public = list(
+ index_type = function()
`arrow::DataType`$dispatch(DictionaryType__index_type(self)),
+ name = function() DictionaryType__name(self),
+ dictionary = function()
`arrow::Array`$new(DictionaryType__dictionary(self)),
+ ordered = function() DictionaryType__ordered(self)
+ )
+
+)
+
+#' dictionary type factory
+#'
+#' @param type indices type, e.g. [int32()]
+#' @param values values array, typically an arrow array of strings
+#' @param ordered Is this an ordred dictionary
+#'
+#' @export
+dictionary <- function(type, values, ordered = FALSE) {
+ assert_that(
+ inherits(type, "arrow::DataType"),
+ inherits(values, "arrow::Array")
+ )
+ `arrow::DictionaryType`$new(DictionaryType__initialize(type, values,
ordered))
+}
diff --git a/r/R/enums.R b/r/R/enums.R
index 3649d4e..a491d52 100644
--- a/r/R/enums.R
+++ b/r/R/enums.R
@@ -16,48 +16,47 @@
# under the License.
#' @export
-`$.arrow-enum` <- function(x, y){
- structure(unclass(x)[[y]], class = class(x))
-}
-
-#' @export
`print.arrow-enum` <- function(x, ...){
NextMethod()
}
#' @importFrom rlang seq2 quo_name set_names
#' @importFrom purrr map_chr
-enum <- function(class, ...){
- names <- purrr::map_chr(rlang::quos(...), rlang::quo_name)
- names[is.na(names)] <- "NA"
-
+enum <- function(class, ..., .list = list(...)){
structure(
- rlang::set_names(rlang::seq2(0L, length(names)-1), names),
+ .list,
class = c(class, "arrow-enum")
)
}
#' @rdname DataType
#' @export
-TimeUnit <- enum("arrow::TimeUnit::type", SECOND, MILLI, MICRO, NANO)
+TimeUnit <- enum("arrow::TimeUnit::type",
+ SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L
+)
#' @rdname DataType
#' @export
-DateUnit <- enum("arrow::DateUnit", DAY, MILLI)
+DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L)
#' @rdname DataType
#' @export
Type <- enum("arrow::Type::type",
- NA, BOOL, UINT8, INT8, UINT16, INT16, UINT32, INT32, UINT64, INT64,
- HALF_FLOAT, FLOAT, DOUBLE, STRING, BINARY, DATE32, DATE64, TIMESTAMP,
- INTERVAL, DECIMAL, LIST, STRUCT, UNION, DICTIONARY, MAP
+ "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L,
+ UINT32 = 6L, INT32 = 7L, UINT64 = 8L, INT64 = 9L,
+ HALF_FLOAT = 10L, FLOAT = 11L, DOUBLE = 12L, STRING = 13L,
+ BINARY = 14L, FIXED_SIZE_BINARY = 15L, DATE32 = 16L, DATE64 = 17L, TIMESTAMP
= 18L,
+ TIME32 = 19L, TIME64 = 20L, INTERVAL = 21L, DECIMAL = 22L, LIST = 23L,
STRUCT = 24L,
+ UNION = 25L, DICTIONARY = 26L, MAP = 27L
)
#' @rdname DataType
#' @export
StatusCode <- enum("arrow::StatusCode",
- OK, OutOfMemory, KeyError, TypeError, Invalid, IOError,
- CapacityError, UnknownError, NotImplemented, SerializationError,
- PythonError, PlasmaObjectExists, PlasmaObjectNonexistent, PlasmaStoreFull,
- PlasmaObjectAlreadySealed
+ OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L,
+ Invalid = 4L, IOError = 5L, CapacityError = 6L,
+ UnknownError = 9L, NotImplemented = 10L, SerializationError = 11L,
+ PythonError = 12L, RError = 13L,
+ PlasmaObjectExists = 20L, PlasmaObjectNonexistent = 21L,
+ PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L
)
diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd
new file mode 100644
index 0000000..22d35f6
--- /dev/null
+++ b/r/man/dictionary.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dictionary.R
+\name{dictionary}
+\alias{dictionary}
+\title{dictionary type factory}
+\usage{
+dictionary(type, values, ordered = FALSE)
+}
+\arguments{
+\item{type}{indices type, e.g. \code{\link[=int32]{int32()}}}
+
+\item{values}{values array, typically an arrow array of strings}
+
+\item{ordered}{Is this an ordred dictionary}
+}
+\description{
+dictionary type factory
+}
diff --git a/r/src/ChunkedArray.cpp b/r/src/ChunkedArray.cpp
index 59f21f5..aa348d9 100644
--- a/r/src/ChunkedArray.cpp
+++ b/r/src/ChunkedArray.cpp
@@ -23,7 +23,7 @@ using namespace arrow;
template <int RTYPE>
inline SEXP simple_ChunkedArray_to_Vector(
const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
- using stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
+ using value_type = typename Rcpp::Vector<RTYPE>::stored_type;
Rcpp::Vector<RTYPE> out = no_init(chunked_array->length());
auto p = out.begin();
@@ -34,10 +34,9 @@ inline SEXP simple_ChunkedArray_to_Vector(
// copy the data
auto q = p;
- p = std::copy_n(
- reinterpret_cast<const stored_type*>(chunk->data()->buffers[1]->data()
+
- chunk->offset() *
sizeof(stored_type)),
- n, p);
+ auto p_chunk =
+ arrow::r::GetValuesSafely<value_type>(chunk->data(), 1,
chunk->offset());
+ p = std::copy_n(p_chunk, n, p);
// set NA using the bitmap
auto bitmap_data = chunk->null_bitmap();
diff --git a/r/src/DataType.cpp b/r/src/DataType.cpp
index 00e12eb..bd0b4b9 100644
--- a/r/src/DataType.cpp
+++ b/r/src/DataType.cpp
@@ -214,3 +214,32 @@ arrow::TimeUnit::type TimestampType__unit(
std::string Object__pointer_address(SEXP obj) {
return tfm::format("%p", EXTPTR_PTR(obj));
}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::DataType> DictionaryType__initialize(
+ const std::shared_ptr<arrow::DataType>& type,
+ const std::shared_ptr<arrow::Array>& array, bool ordered) {
+ return arrow::dictionary(type, array, ordered);
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::DataType> DictionaryType__index_type(
+ const std::shared_ptr<arrow::DictionaryType>& type) {
+ return type->index_type();
+}
+
+// [[Rcpp::export]]
+std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>&
type) {
+ return type->name();
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Array> DictionaryType__dictionary(
+ const std::shared_ptr<arrow::DictionaryType>& type) {
+ return type->dictionary();
+}
+
+// [[Rcpp::export]]
+bool DictionaryType__ordered(const std::shared_ptr<arrow::DictionaryType>&
type) {
+ return type->ordered();
+}
diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp
index 200071b..dcf005a 100644
--- a/r/src/RcppExports.cpp
+++ b/r/src/RcppExports.cpp
@@ -6,204 +6,6 @@
using namespace Rcpp;
-// Array__from_vector
-std::shared_ptr<arrow::Array> Array__from_vector(SEXP x);
-RcppExport SEXP _arrow_Array__from_vector(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< SEXP >::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__from_vector(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__as_vector
-SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
-RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
- rcpp_result_gen = Rcpp::wrap(Array__as_vector(array));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__Slice1
-std::shared_ptr<arrow::Array> Array__Slice1(const
std::shared_ptr<arrow::Array>& array, int offset);
-RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
- Rcpp::traits::input_parameter< int >::type offset(offsetSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__Slice1(array, offset));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__Slice2
-std::shared_ptr<arrow::Array> Array__Slice2(const
std::shared_ptr<arrow::Array>& array, int offset, int length);
-RcppExport SEXP _arrow_Array__Slice2(SEXP arraySEXP, SEXP offsetSEXP, SEXP
lengthSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
- Rcpp::traits::input_parameter< int >::type offset(offsetSEXP);
- Rcpp::traits::input_parameter< int >::type length(lengthSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__Slice2(array, offset, length));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__IsNull
-bool Array__IsNull(const std::shared_ptr<arrow::Array>& x, int i);
-RcppExport SEXP _arrow_Array__IsNull(SEXP xSEXP, SEXP iSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- Rcpp::traits::input_parameter< int >::type i(iSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__IsNull(x, i));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__IsValid
-bool Array__IsValid(const std::shared_ptr<arrow::Array>& x, int i);
-RcppExport SEXP _arrow_Array__IsValid(SEXP xSEXP, SEXP iSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- Rcpp::traits::input_parameter< int >::type i(iSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__IsValid(x, i));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__length
-int Array__length(const std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__length(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__length(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__offset
-int Array__offset(const std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__offset(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__offset(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__null_count
-int Array__null_count(const std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__null_count(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__null_count(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__type
-std::shared_ptr<arrow::DataType> Array__type(const
std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__type(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__type(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__ToString
-std::string Array__ToString(const std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__ToString(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__ToString(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__type_id
-arrow::Type::type Array__type_id(const std::shared_ptr<arrow::Array>& x);
-RcppExport SEXP _arrow_Array__type_id(SEXP xSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__type_id(x));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__Equals
-bool Array__Equals(const std::shared_ptr<arrow::Array>& lhs, const
std::shared_ptr<arrow::Array>& rhs);
-RcppExport SEXP _arrow_Array__Equals(SEXP lhsSEXP, SEXP rhsSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type lhs(lhsSEXP);
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type rhs(rhsSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__Equals(lhs, rhs));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__ApproxEquals
-bool Array__ApproxEquals(const std::shared_ptr<arrow::Array>& lhs, const
std::shared_ptr<arrow::Array>& rhs);
-RcppExport SEXP _arrow_Array__ApproxEquals(SEXP lhsSEXP, SEXP rhsSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type lhs(lhsSEXP);
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type rhs(rhsSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__ApproxEquals(lhs, rhs));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__data
-std::shared_ptr<arrow::ArrayData> Array__data(const
std::shared_ptr<arrow::Array>& array);
-RcppExport SEXP _arrow_Array__data(SEXP arraySEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
- rcpp_result_gen = Rcpp::wrap(Array__data(array));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__RangeEquals
-bool Array__RangeEquals(const std::shared_ptr<arrow::Array>& self, const
std::shared_ptr<arrow::Array>& other, int start_idx, int end_idx, int
other_start_idx);
-RcppExport SEXP _arrow_Array__RangeEquals(SEXP selfSEXP, SEXP otherSEXP, SEXP
start_idxSEXP, SEXP end_idxSEXP, SEXP other_start_idxSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type self(selfSEXP);
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type other(otherSEXP);
- Rcpp::traits::input_parameter< int >::type start_idx(start_idxSEXP);
- Rcpp::traits::input_parameter< int >::type end_idx(end_idxSEXP);
- Rcpp::traits::input_parameter< int >::type
other_start_idx(other_start_idxSEXP);
- rcpp_result_gen = Rcpp::wrap(Array__RangeEquals(self, other, start_idx,
end_idx, other_start_idx));
- return rcpp_result_gen;
-END_RCPP
-}
-// Array__Mask
-LogicalVector Array__Mask(const std::shared_ptr<arrow::Array>& array);
-RcppExport SEXP _arrow_Array__Mask(SEXP arraySEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
- rcpp_result_gen = Rcpp::wrap(Array__Mask(array));
- return rcpp_result_gen;
-END_RCPP
-}
// ArrayData__get_type
std::shared_ptr<arrow::DataType> ArrayData__get_type(const
std::shared_ptr<arrow::ArrayData>& x);
RcppExport SEXP _arrow_ArrayData__get_type(SEXP xSEXP) {
@@ -844,49 +646,60 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
-// Field__initialize
-std::shared_ptr<arrow::Field> Field__initialize(const std::string& name, const
std::shared_ptr<arrow::DataType>& type, bool nullable);
-RcppExport SEXP _arrow_Field__initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP
nullableSEXP) {
+// DictionaryType__initialize
+std::shared_ptr<arrow::DataType> DictionaryType__initialize(const
std::shared_ptr<arrow::DataType>& type, const std::shared_ptr<arrow::Array>&
array, bool ordered);
+RcppExport SEXP _arrow_DictionaryType__initialize(SEXP typeSEXP, SEXP
arraySEXP, SEXP orderedSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP);
Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DataType>&
>::type type(typeSEXP);
- Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP);
- rcpp_result_gen = Rcpp::wrap(Field__initialize(name, type, nullable));
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ Rcpp::traits::input_parameter< bool >::type ordered(orderedSEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryType__initialize(type, array,
ordered));
return rcpp_result_gen;
END_RCPP
}
-// Field__ToString
-std::string Field__ToString(const std::shared_ptr<arrow::Field>& type);
-RcppExport SEXP _arrow_Field__ToString(SEXP typeSEXP) {
+// DictionaryType__index_type
+std::shared_ptr<arrow::DataType> DictionaryType__index_type(const
std::shared_ptr<arrow::DictionaryType>& type);
+RcppExport SEXP _arrow_DictionaryType__index_type(SEXP typeSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Field>&
>::type type(typeSEXP);
- rcpp_result_gen = Rcpp::wrap(Field__ToString(type));
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryType__index_type(type));
return rcpp_result_gen;
END_RCPP
}
-// Field__name
-std::string Field__name(std::shared_ptr<arrow::Field> type);
-RcppExport SEXP _arrow_Field__name(SEXP typeSEXP) {
+// DictionaryType__name
+std::string DictionaryType__name(const std::shared_ptr<arrow::DictionaryType>&
type);
+RcppExport SEXP _arrow_DictionaryType__name(SEXP typeSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type
type(typeSEXP);
- rcpp_result_gen = Rcpp::wrap(Field__name(type));
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryType__name(type));
return rcpp_result_gen;
END_RCPP
}
-// Field__nullable
-bool Field__nullable(std::shared_ptr<arrow::Field> type);
-RcppExport SEXP _arrow_Field__nullable(SEXP typeSEXP) {
+// DictionaryType__dictionary
+std::shared_ptr<arrow::Array> DictionaryType__dictionary(const
std::shared_ptr<arrow::DictionaryType>& type);
+RcppExport SEXP _arrow_DictionaryType__dictionary(SEXP typeSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type
type(typeSEXP);
- rcpp_result_gen = Rcpp::wrap(Field__nullable(type));
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryType__dictionary(type));
+ return rcpp_result_gen;
+END_RCPP
+}
+// DictionaryType__ordered
+bool DictionaryType__ordered(const std::shared_ptr<arrow::DictionaryType>&
type);
+RcppExport SEXP _arrow_DictionaryType__ordered(SEXP typeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryType>& >::type type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryType__ordered(type));
return rcpp_result_gen;
END_RCPP
}
@@ -1174,36 +987,285 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
-
-static const R_CallMethodDef CallEntries[] = {
- {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1},
- {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1},
- {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2},
- {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3},
- {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2},
- {"_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2},
- {"_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1},
- {"_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1},
- {"_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1},
- {"_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1},
- {"_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1},
- {"_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1},
- {"_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2},
- {"_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2},
- {"_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1},
- {"_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5},
- {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1},
- {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1},
- {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length,
1},
- {"_arrow_ArrayData__get_null_count", (DL_FUNC)
&_arrow_ArrayData__get_null_count, 1},
- {"_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset,
1},
- {"_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1},
- {"_arrow_ChunkedArray__null_count", (DL_FUNC)
&_arrow_ChunkedArray__null_count, 1},
- {"_arrow_ChunkedArray__num_chunks", (DL_FUNC)
&_arrow_ChunkedArray__num_chunks, 1},
- {"_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2},
- {"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1},
- {"_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1},
- {"_arrow_ChunkedArray__as_vector", (DL_FUNC)
&_arrow_ChunkedArray__as_vector, 1},
+// Array__from_vector
+std::shared_ptr<arrow::Array> Array__from_vector(SEXP x);
+RcppExport SEXP _arrow_Array__from_vector(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< SEXP >::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__from_vector(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__as_vector
+SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
+RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__as_vector(array));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__Slice1
+std::shared_ptr<arrow::Array> Array__Slice1(const
std::shared_ptr<arrow::Array>& array, int offset);
+RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ Rcpp::traits::input_parameter< int >::type offset(offsetSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__Slice1(array, offset));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__Slice2
+std::shared_ptr<arrow::Array> Array__Slice2(const
std::shared_ptr<arrow::Array>& array, int offset, int length);
+RcppExport SEXP _arrow_Array__Slice2(SEXP arraySEXP, SEXP offsetSEXP, SEXP
lengthSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ Rcpp::traits::input_parameter< int >::type offset(offsetSEXP);
+ Rcpp::traits::input_parameter< int >::type length(lengthSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__Slice2(array, offset, length));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__IsNull
+bool Array__IsNull(const std::shared_ptr<arrow::Array>& x, int i);
+RcppExport SEXP _arrow_Array__IsNull(SEXP xSEXP, SEXP iSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ Rcpp::traits::input_parameter< int >::type i(iSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__IsNull(x, i));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__IsValid
+bool Array__IsValid(const std::shared_ptr<arrow::Array>& x, int i);
+RcppExport SEXP _arrow_Array__IsValid(SEXP xSEXP, SEXP iSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ Rcpp::traits::input_parameter< int >::type i(iSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__IsValid(x, i));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__length
+int Array__length(const std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__length(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__length(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__offset
+int Array__offset(const std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__offset(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__offset(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__null_count
+int Array__null_count(const std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__null_count(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__null_count(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__type
+std::shared_ptr<arrow::DataType> Array__type(const
std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__type(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__type(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__ToString
+std::string Array__ToString(const std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__ToString(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__ToString(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__type_id
+arrow::Type::type Array__type_id(const std::shared_ptr<arrow::Array>& x);
+RcppExport SEXP _arrow_Array__type_id(SEXP xSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type x(xSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__type_id(x));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__Equals
+bool Array__Equals(const std::shared_ptr<arrow::Array>& lhs, const
std::shared_ptr<arrow::Array>& rhs);
+RcppExport SEXP _arrow_Array__Equals(SEXP lhsSEXP, SEXP rhsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type lhs(lhsSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type rhs(rhsSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__Equals(lhs, rhs));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__ApproxEquals
+bool Array__ApproxEquals(const std::shared_ptr<arrow::Array>& lhs, const
std::shared_ptr<arrow::Array>& rhs);
+RcppExport SEXP _arrow_Array__ApproxEquals(SEXP lhsSEXP, SEXP rhsSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type lhs(lhsSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type rhs(rhsSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__ApproxEquals(lhs, rhs));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__data
+std::shared_ptr<arrow::ArrayData> Array__data(const
std::shared_ptr<arrow::Array>& array);
+RcppExport SEXP _arrow_Array__data(SEXP arraySEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__data(array));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__RangeEquals
+bool Array__RangeEquals(const std::shared_ptr<arrow::Array>& self, const
std::shared_ptr<arrow::Array>& other, int start_idx, int end_idx, int
other_start_idx);
+RcppExport SEXP _arrow_Array__RangeEquals(SEXP selfSEXP, SEXP otherSEXP, SEXP
start_idxSEXP, SEXP end_idxSEXP, SEXP other_start_idxSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type self(selfSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type other(otherSEXP);
+ Rcpp::traits::input_parameter< int >::type start_idx(start_idxSEXP);
+ Rcpp::traits::input_parameter< int >::type end_idx(end_idxSEXP);
+ Rcpp::traits::input_parameter< int >::type
other_start_idx(other_start_idxSEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__RangeEquals(self, other, start_idx,
end_idx, other_start_idx));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Array__Mask
+LogicalVector Array__Mask(const std::shared_ptr<arrow::Array>& array);
+RcppExport SEXP _arrow_Array__Mask(SEXP arraySEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Array>&
>::type array(arraySEXP);
+ rcpp_result_gen = Rcpp::wrap(Array__Mask(array));
+ return rcpp_result_gen;
+END_RCPP
+}
+// DictionaryArray__indices
+std::shared_ptr<arrow::Array> DictionaryArray__indices(const
std::shared_ptr<arrow::DictionaryArray>& array);
+RcppExport SEXP _arrow_DictionaryArray__indices(SEXP arraySEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryArray>& >::type array(arraySEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryArray__indices(array));
+ return rcpp_result_gen;
+END_RCPP
+}
+// DictionaryArray__dictionary
+std::shared_ptr<arrow::Array> DictionaryArray__dictionary(const
std::shared_ptr<arrow::DictionaryArray>& array);
+RcppExport SEXP _arrow_DictionaryArray__dictionary(SEXP arraySEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const
std::shared_ptr<arrow::DictionaryArray>& >::type array(arraySEXP);
+ rcpp_result_gen = Rcpp::wrap(DictionaryArray__dictionary(array));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Field__initialize
+std::shared_ptr<arrow::Field> Field__initialize(const std::string& name, const
std::shared_ptr<arrow::DataType>& type, bool nullable);
+RcppExport SEXP _arrow_Field__initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP
nullableSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP);
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::DataType>&
>::type type(typeSEXP);
+ Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP);
+ rcpp_result_gen = Rcpp::wrap(Field__initialize(name, type, nullable));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Field__ToString
+std::string Field__ToString(const std::shared_ptr<arrow::Field>& type);
+RcppExport SEXP _arrow_Field__ToString(SEXP typeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< const std::shared_ptr<arrow::Field>&
>::type type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(Field__ToString(type));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Field__name
+std::string Field__name(std::shared_ptr<arrow::Field> type);
+RcppExport SEXP _arrow_Field__name(SEXP typeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type
type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(Field__name(type));
+ return rcpp_result_gen;
+END_RCPP
+}
+// Field__nullable
+bool Field__nullable(std::shared_ptr<arrow::Field> type);
+RcppExport SEXP _arrow_Field__nullable(SEXP typeSEXP) {
+BEGIN_RCPP
+ Rcpp::RObject rcpp_result_gen;
+ Rcpp::RNGScope rcpp_rngScope_gen;
+ Rcpp::traits::input_parameter< std::shared_ptr<arrow::Field> >::type
type(typeSEXP);
+ rcpp_result_gen = Rcpp::wrap(Field__nullable(type));
+ return rcpp_result_gen;
+END_RCPP
+}
+
+static const R_CallMethodDef CallEntries[] = {
+ {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1},
+ {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length,
1},
+ {"_arrow_ArrayData__get_null_count", (DL_FUNC)
&_arrow_ArrayData__get_null_count, 1},
+ {"_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset,
1},
+ {"_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1},
+ {"_arrow_ChunkedArray__null_count", (DL_FUNC)
&_arrow_ChunkedArray__null_count, 1},
+ {"_arrow_ChunkedArray__num_chunks", (DL_FUNC)
&_arrow_ChunkedArray__num_chunks, 1},
+ {"_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2},
+ {"_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1},
+ {"_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1},
+ {"_arrow_ChunkedArray__as_vector", (DL_FUNC)
&_arrow_ChunkedArray__as_vector, 1},
{"_arrow_ChunkArray__Slice1", (DL_FUNC) &_arrow_ChunkArray__Slice1, 2},
{"_arrow_ChunkArray__Slice2", (DL_FUNC) &_arrow_ChunkArray__Slice2, 3},
{"_arrow_ChunkedArray__from_list", (DL_FUNC)
&_arrow_ChunkedArray__from_list, 1},
@@ -1252,10 +1314,11 @@ static const R_CallMethodDef CallEntries[] = {
{"_arrow_TimestampType__timezone", (DL_FUNC)
&_arrow_TimestampType__timezone, 1},
{"_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1},
{"_arrow_Object__pointer_address", (DL_FUNC)
&_arrow_Object__pointer_address, 1},
- {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3},
- {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1},
- {"_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1},
- {"_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1},
+ {"_arrow_DictionaryType__initialize", (DL_FUNC)
&_arrow_DictionaryType__initialize, 3},
+ {"_arrow_DictionaryType__index_type", (DL_FUNC)
&_arrow_DictionaryType__index_type, 1},
+ {"_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1},
+ {"_arrow_DictionaryType__dictionary", (DL_FUNC)
&_arrow_DictionaryType__dictionary, 1},
+ {"_arrow_DictionaryType__ordered", (DL_FUNC)
&_arrow_DictionaryType__ordered, 1},
{"_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0},
{"_arrow_MemoryPool__bytes_allocated", (DL_FUNC)
&_arrow_MemoryPool__bytes_allocated, 1},
{"_arrow_MemoryPool__max_memory", (DL_FUNC)
&_arrow_MemoryPool__max_memory, 1},
@@ -1281,6 +1344,29 @@ static const R_CallMethodDef CallEntries[] = {
{"_arrow_read_table_", (DL_FUNC) &_arrow_read_table_, 1},
{"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1},
{"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2},
+ {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1},
+ {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1},
+ {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2},
+ {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3},
+ {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2},
+ {"_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2},
+ {"_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1},
+ {"_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1},
+ {"_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1},
+ {"_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1},
+ {"_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1},
+ {"_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1},
+ {"_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2},
+ {"_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2},
+ {"_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1},
+ {"_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5},
+ {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1},
+ {"_arrow_DictionaryArray__indices", (DL_FUNC)
&_arrow_DictionaryArray__indices, 1},
+ {"_arrow_DictionaryArray__dictionary", (DL_FUNC)
&_arrow_DictionaryArray__dictionary, 1},
+ {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3},
+ {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1},
+ {"_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1},
+ {"_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1},
{NULL, NULL, 0}
};
diff --git a/r/src/array.cpp b/r/src/array.cpp
index e11e1f7..0f6c18a 100644
--- a/r/src/array.cpp
+++ b/r/src/array.cpp
@@ -36,6 +36,8 @@ class SimpleRBuffer : public Buffer {
Vec vec_;
};
+// ---------------------------- R vector -> Array
+
template <int RTYPE, typename Type>
std::shared_ptr<Array> SimpleArray(SEXP x) {
Rcpp::Vector<RTYPE> vec(x);
@@ -49,7 +51,7 @@ std::shared_ptr<Array> SimpleArray(SEXP x) {
auto first_na = std::find_if(vec.begin(), vec.end(),
Rcpp::Vector<RTYPE>::is_na);
if (first_na < vec.end()) {
- R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_bitmap));
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_bitmap));
internal::FirstTimeBitmapWriter
bitmap_writer(null_bitmap->mutable_data(), 0, n);
// first loop to clear all the bits before the first NA
@@ -87,7 +89,7 @@ std::shared_ptr<arrow::Array> MakeBooleanArray(LogicalVector_
vec) {
// allocate a buffer for the data
std::shared_ptr<Buffer> data_bitmap;
- R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &data_bitmap));
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &data_bitmap));
auto data_bitmap_data = data_bitmap->mutable_data();
internal::FirstTimeBitmapWriter bitmap_writer(data_bitmap_data, 0, n);
R_xlen_t null_count = 0;
@@ -108,7 +110,7 @@ std::shared_ptr<arrow::Array>
MakeBooleanArray(LogicalVector_ vec) {
if (i < n) {
// there has been a null before the end, so we need
// to collect that information in a null bitmap
- R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_bitmap));
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_bitmap));
auto null_bitmap_data = null_bitmap->mutable_data();
internal::FirstTimeBitmapWriter null_bitmap_writer(null_bitmap_data, 0, n);
@@ -166,7 +168,7 @@ std::shared_ptr<Array> MakeStringArray(StringVector_ vec) {
}
if (i < n) {
- R_ERROR_NOT_OK(AllocateBuffer(ceil((double)n / 8), &null_buffer));
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer));
internal::FirstTimeBitmapWriter
null_bitmap_writer(null_buffer->mutable_data(), 0, n);
// catch up
@@ -210,6 +212,139 @@ std::shared_ptr<Array> MakeStringArray(StringVector_ vec)
{
return MakeArray(data);
}
+template <typename Type>
+std::shared_ptr<Array> MakeFactorArrayImpl(Rcpp::IntegerVector_ factor) {
+ using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
+ auto dict_values = MakeStringArray(Rf_getAttrib(factor, R_LevelsSymbol));
+ auto dict_type =
+ dictionary(std::make_shared<Type>(), dict_values, Rf_inherits(factor,
"ordered"));
+
+ auto n = factor.size();
+
+ std::shared_ptr<Buffer> indices_buffer;
+ R_ERROR_NOT_OK(AllocateBuffer(n * sizeof(value_type), &indices_buffer));
+
+ std::vector<std::shared_ptr<Buffer>> buffers{nullptr, indices_buffer};
+
+ int64_t null_count = 0;
+ R_xlen_t i = 0;
+ auto p_factor = factor.begin();
+ auto p_indices =
reinterpret_cast<value_type*>(indices_buffer->mutable_data());
+ for (; i < n; i++, ++p_indices, ++p_factor) {
+ if (*p_factor == NA_INTEGER) break;
+ *p_indices = *p_factor - 1;
+ }
+
+ if (i < n) {
+ // there are NA's so we need a null buffer
+ std::shared_ptr<Buffer> null_buffer;
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer));
+ internal::FirstTimeBitmapWriter
null_bitmap_writer(null_buffer->mutable_data(), 0, n);
+
+ // catch up
+ for (R_xlen_t j = 0; j < i; j++, null_bitmap_writer.Next()) {
+ null_bitmap_writer.Set();
+ }
+
+ // resume offset filling
+ for (; i < n; i++, ++p_indices, ++p_factor, null_bitmap_writer.Next()) {
+ if (*p_factor == NA_INTEGER) {
+ null_bitmap_writer.Clear();
+ null_count++;
+ } else {
+ null_bitmap_writer.Set();
+ *p_indices = *p_factor - 1;
+ }
+ }
+
+ null_bitmap_writer.Finish();
+ buffers[0] = std::move(null_buffer);
+ }
+
+ auto array_indices_data =
+ ArrayData::Make(std::make_shared<Type>(), n, std::move(buffers),
null_count, 0);
+ auto array_indices = MakeArray(array_indices_data);
+
+ std::shared_ptr<Array> out;
+ R_ERROR_NOT_OK(DictionaryArray::FromArrays(dict_type, array_indices, &out));
+ return out;
+}
+
+std::shared_ptr<Array> MakeFactorArray(Rcpp::IntegerVector_ factor) {
+ SEXP levels = factor.attr("levels");
+ int n = Rf_length(levels);
+ if (n < 128) {
+ return MakeFactorArrayImpl<arrow::Int8Type>(factor);
+ } else if (n < 32768) {
+ return MakeFactorArrayImpl<arrow::Int16Type>(factor);
+ } else {
+ return MakeFactorArrayImpl<arrow::Int32Type>(factor);
+ }
+}
+
+template <typename T>
+int64_t time_cast(T value);
+
+template <>
+inline int64_t time_cast<int>(int value) {
+ return static_cast<int64_t>(value) * 1000;
+}
+
+template <>
+inline int64_t time_cast<double>(double value) {
+ return static_cast<int64_t>(value * 1000);
+}
+
+template <int RTYPE>
+std::shared_ptr<Array> Date64Array_From_POSIXct(SEXP x) {
+ using stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
+ Rcpp::Vector<RTYPE> vec(x);
+ auto p_vec = vec.begin();
+ auto n = vec.size();
+
+ std::shared_ptr<Buffer> values_buffer;
+ R_ERROR_NOT_OK(AllocateBuffer(n * sizeof(int64_t), &values_buffer));
+ auto p_values = reinterpret_cast<int64_t*>(values_buffer->mutable_data());
+
+ std::vector<std::shared_ptr<Buffer>> buffers{nullptr, values_buffer};
+
+ int null_count = 0;
+ R_xlen_t i = 0;
+ for (; i < n; i++, ++p_vec, ++p_values) {
+ if (Rcpp::Vector<RTYPE>::is_na(*p_vec)) break;
+ *p_values = time_cast(*p_vec);
+ }
+ if (i < n) {
+ std::shared_ptr<Buffer> null_buffer;
+ R_ERROR_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(n), &null_buffer));
+ internal::FirstTimeBitmapWriter bitmap_writer(null_buffer->mutable_data(),
0, n);
+
+ // catch up
+ for (R_xlen_t j = 0; j < i; j++, bitmap_writer.Next()) {
+ bitmap_writer.Set();
+ }
+
+ // finish
+ for (; i < n; i++, ++p_vec, ++p_values, bitmap_writer.Next()) {
+ if (Rcpp::Vector<RTYPE>::is_na(*p_vec)) {
+ bitmap_writer.Clear();
+ null_count++;
+ } else {
+ bitmap_writer.Set();
+ *p_values = time_cast(*p_vec);
+ }
+ }
+
+ bitmap_writer.Finish();
+ buffers[0] = std::move(null_buffer);
+ }
+
+ auto data = ArrayData::Make(std::make_shared<Date64Type>(), n,
std::move(buffers),
+ null_count, 0);
+
+ return std::make_shared<Date64Array>(data);
+}
+
} // namespace r
} // namespace arrow
@@ -220,11 +355,22 @@ std::shared_ptr<arrow::Array> Array__from_vector(SEXP x) {
return arrow::r::MakeBooleanArray(x);
case INTSXP:
if (Rf_isFactor(x)) {
- break;
+ return arrow::r::MakeFactorArray(x);
+ }
+ if (Rf_inherits(x, "Date")) {
+ return arrow::r::SimpleArray<INTSXP, arrow::Date32Type>(x);
+ }
+ if (Rf_inherits(x, "POSIXct")) {
+ return arrow::r::Date64Array_From_POSIXct<INTSXP>(x);
}
return arrow::r::SimpleArray<INTSXP, arrow::Int32Type>(x);
case REALSXP:
- // TODO: Dates, ...
+ if (Rf_inherits(x, "Date")) {
+ return arrow::r::SimpleArray<INTSXP, arrow::Date32Type>(x);
+ }
+ if (Rf_inherits(x, "POSIXct")) {
+ return arrow::r::Date64Array_From_POSIXct<REALSXP>(x);
+ }
return arrow::r::SimpleArray<REALSXP, arrow::DoubleType>(x);
case RAWSXP:
return arrow::r::SimpleArray<RAWSXP, arrow::Int8Type>(x);
@@ -238,14 +384,28 @@ std::shared_ptr<arrow::Array> Array__from_vector(SEXP x) {
return nullptr;
}
+// ---------------------------- Array -> R vector
+
+namespace arrow {
+namespace r {
+
template <int RTYPE>
inline SEXP simple_Array_to_Vector(const std::shared_ptr<arrow::Array>& array)
{
- using stored_type = typename Rcpp::Vector<RTYPE>::stored_type;
- auto start = reinterpret_cast<const stored_type*>(
- array->data()->buffers[1]->data() + array->offset() *
sizeof(stored_type));
+ using value_type = typename Rcpp::Vector<RTYPE>::stored_type;
+ auto n = array->length();
+ auto null_count = array->null_count();
- size_t n = array->length();
- Rcpp::Vector<RTYPE> vec(start, start + n);
+ // special cases
+ if (n == 0) return Rcpp::Vector<RTYPE>(0);
+ if (n == null_count) {
+ return Rcpp::Vector<RTYPE>(n, default_value<RTYPE>());
+ }
+
+ // first copy all the data
+ auto p_values = GetValuesSafely<value_type>(array->data(), 1,
array->offset());
+ Rcpp::Vector<RTYPE> vec(p_values, p_values + n);
+
+ // then set the sentinel NA
if (array->null_count() && RTYPE != RAWSXP) {
// TODO: not sure what to do with RAWSXP since
// R raw vector do not have a concept of missing data
@@ -262,41 +422,25 @@ inline SEXP simple_Array_to_Vector(const
std::shared_ptr<arrow::Array>& array) {
return vec;
}
-inline SEXP BooleanArray_to_Vector(const std::shared_ptr<arrow::Array>& array)
{
- size_t n = array->length();
- LogicalVector vec(n);
+inline SEXP StringArray_to_Vector(const std::shared_ptr<arrow::Array>& array) {
+ auto n = array->length();
+ auto null_count = array->null_count();
- // process the data
- arrow::internal::BitmapReader data_reader(array->data()->buffers[1]->data(),
- array->offset(), n);
- for (size_t i = 0; i < n; i++, data_reader.Next()) {
- vec[i] = data_reader.IsSet();
- }
+ // special cases
+ if (n == 0) return Rcpp::CharacterVector_(0);
- // then the null bitmap if needed
- if (array->null_count()) {
- arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(),
- array->offset(), n);
- for (size_t i = 0; i < n; i++, null_reader.Next()) {
- if (null_reader.IsNotSet()) {
- vec[i] = LogicalVector::get_na();
- }
- }
+ // only NA
+ if (null_count == n) {
+ return StringVector_(n, NA_STRING);
}
- return vec;
-}
-
-inline SEXP StringArray_to_Vector(const std::shared_ptr<arrow::Array>& array) {
- auto n = array->length();
- Rcpp::CharacterVector res(n);
-
+ Rcpp::CharacterVector res(no_init(n));
const auto& buffers = array->data()->buffers;
- auto p_offset = reinterpret_cast<const int32_t*>(buffers[1]->data()) +
array->offset();
- auto p_data = reinterpret_cast<const char*>(buffers[2]->data()) + *p_offset;
+ auto p_offset = GetValuesSafely<int32_t>(array->data(), 1, array->offset());
+ auto p_data = GetValuesSafely<char>(array->data(), 2, *p_offset);
- if (array->null_count()) {
+ if (null_count) {
// need to watch for nulls
arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
array->offset(),
n);
@@ -323,8 +467,154 @@ inline SEXP StringArray_to_Vector(const
std::shared_ptr<arrow::Array>& array) {
return res;
}
+inline SEXP BooleanArray_to_Vector(const std::shared_ptr<arrow::Array>& array)
{
+ auto n = array->length();
+ auto null_count = array->null_count();
+
+ if (n == 0) {
+ return LogicalVector(0);
+ }
+ if (n == null_count) {
+ return LogicalVector(n, NA_LOGICAL);
+ }
+
+ LogicalVector vec = no_init(n);
+
+ // process the data
+ auto p_data = GetValuesSafely<uint8_t>(array->data(), 1, 0);
+ arrow::internal::BitmapReader data_reader(p_data, array->offset(), n);
+ for (size_t i = 0; i < n; i++, data_reader.Next()) {
+ vec[i] = data_reader.IsSet();
+ }
+
+ // then the null bitmap if needed
+ if (array->null_count()) {
+ arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(),
+ array->offset(), n);
+ for (size_t i = 0; i < n; i++, null_reader.Next()) {
+ if (null_reader.IsNotSet()) {
+ vec[i] = LogicalVector::get_na();
+ }
+ }
+ }
+
+ return vec;
+}
+
+template <typename Type>
+inline SEXP DictionaryArrayInt32Indices_to_Vector(
+ const std::shared_ptr<arrow::Array>& array, const
std::shared_ptr<arrow::Array>& dict,
+ bool ordered) {
+ using value_type = typename arrow::TypeTraits<Type>::ArrayType::value_type;
+
+ size_t n = array->length();
+ IntegerVector vec(no_init(n));
+ vec.attr("levels") = StringArray_to_Vector(dict);
+ if (ordered) {
+ vec.attr("class") = CharacterVector::create("ordered", "factor");
+ } else {
+ vec.attr("class") = "factor";
+ }
+
+ if (n == 0) {
+ return vec;
+ }
+
+ auto null_count = array->null_count();
+ if (n == null_count) {
+ std::fill(vec.begin(), vec.end(), NA_INTEGER);
+ return vec;
+ }
+
+ auto p_array = GetValuesSafely<value_type>(array->data(), 1,
array->offset());
+
+ if (array->null_count()) {
+ arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
+ array->offset(), n);
+ for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_array) {
+ vec[i] = bitmap_reader.IsNotSet() ? NA_INTEGER :
(static_cast<int>(*p_array) + 1);
+ }
+ } else {
+ std::transform(p_array, p_array + n, vec.begin(),
+ [](const value_type value) { return static_cast<int>(value)
+ 1; });
+ }
+ return vec;
+}
+
+SEXP DictionaryArray_to_Vector(arrow::DictionaryArray* dict_array) {
+ auto dict = dict_array->dictionary();
+ auto indices = dict_array->indices();
+
+ if (dict->type_id() != Type::STRING) {
+ stop("Cannot convert Dictionary Array of type `%s` to R",
+ dict_array->type()->ToString());
+ }
+ bool ordered = dict_array->dict_type()->ordered();
+ switch (indices->type_id()) {
+ case Type::UINT8:
+ return DictionaryArrayInt32Indices_to_Vector<arrow::UInt8Type>(indices,
dict,
+ ordered);
+ case Type::INT8:
+ return DictionaryArrayInt32Indices_to_Vector<arrow::Int8Type>(indices,
dict,
+ ordered);
+ case Type::UINT16:
+ return DictionaryArrayInt32Indices_to_Vector<arrow::UInt16Type>(indices,
dict,
+ ordered);
+ case Type::INT16:
+ return DictionaryArrayInt32Indices_to_Vector<arrow::Int16Type>(indices,
dict,
+ ordered);
+ case Type::INT32:
+ return DictionaryArrayInt32Indices_to_Vector<arrow::Int32Type>(indices,
dict,
+ ordered);
+ default:
+ stop("Cannot convert Dictionary Array of type `%s` to R",
+ dict_array->type()->ToString());
+ }
+ return R_NilValue;
+}
+
+SEXP Date32Array_to_Vector(const std::shared_ptr<arrow::Array>& array) {
+ IntegerVector out(simple_Array_to_Vector<INTSXP>(array));
+ out.attr("class") = "Date";
+ return out;
+}
+
+SEXP Date64Array_to_Vector(const std::shared_ptr<arrow::Array> array) {
+ auto n = array->length();
+ NumericVector vec(n);
+ vec.attr("class") = CharacterVector::create("POSIXct", "POSIXt");
+ if (n == 0) {
+ return vec;
+ }
+ auto null_count = array->null_count();
+ if (null_count == n) {
+ std::fill(vec.begin(), vec.end(), NA_REAL);
+ return vec;
+ }
+ auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
+ auto p_vec = vec.begin();
+
+ if (null_count) {
+ arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
+ array->offset(), n);
+ for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) {
+ *p_vec = bitmap_reader.IsSet() ? static_cast<double>(*p_values / 1000) :
NA_REAL;
+ }
+ } else {
+ std::transform(p_values, p_values + n, vec.begin(),
+ [](int64_t value) { return static_cast<double>(value /
1000); });
+ }
+
+ return vec;
+}
+
+} // namespace r
+} // namespace arrow
+
// [[Rcpp::export]]
SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
+ using namespace arrow::r;
+
switch (array->type_id()) {
case Type::BOOL:
return BooleanArray_to_Vector(array);
@@ -336,11 +626,17 @@ SEXP Array__as_vector(const
std::shared_ptr<arrow::Array>& array) {
return simple_Array_to_Vector<REALSXP>(array);
case Type::STRING:
return StringArray_to_Vector(array);
+ case Type::DICTIONARY:
+ return
DictionaryArray_to_Vector(static_cast<arrow::DictionaryArray*>(array.get()));
+ case Type::DATE32:
+ return Date32Array_to_Vector(array);
+ case Type::DATE64:
+ return Date64Array_to_Vector(array);
default:
break;
}
- stop(tfm::format("cannot handle Array of type %d", array->type_id()));
+ stop(tfm::format("cannot handle Array of type %s", array->type()->name()));
return R_NilValue;
}
@@ -428,3 +724,15 @@ LogicalVector Array__Mask(const
std::shared_ptr<arrow::Array>& array) {
}
return res;
}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Array> DictionaryArray__indices(
+ const std::shared_ptr<arrow::DictionaryArray>& array) {
+ return array->indices();
+}
+
+// [[Rcpp::export]]
+std::shared_ptr<arrow::Array> DictionaryArray__dictionary(
+ const std::shared_ptr<arrow::DictionaryArray>& array) {
+ return array->dictionary();
+}
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index e208d0e..879f59a 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -37,7 +37,6 @@ namespace Rcpp {
namespace traits {
struct wrap_type_shared_ptr_tag {};
-struct wrap_type_static_ptr_tag {};
template <typename T>
struct wrap_type_traits<std::shared_ptr<T>> {
@@ -53,9 +52,6 @@ namespace internal {
template <typename T>
inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_shared_ptr_tag);
-template <typename T>
-inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_static_ptr_tag);
-
} // namespace internal
} // namespace Rcpp
@@ -101,11 +97,39 @@ inline SEXP wrap_dispatch(const T& x,
Rcpp::traits::wrap_type_shared_ptr_tag) {
} // namespace Rcpp
namespace Rcpp {
+using IntegerVector_ = Rcpp::Vector<INTSXP, Rcpp::NoProtectStorage>;
using LogicalVector_ = Rcpp::Vector<LGLSXP, Rcpp::NoProtectStorage>;
using StringVector_ = Rcpp::Vector<STRSXP, Rcpp::NoProtectStorage>;
+using CharacterVector_ = StringVector_;
+
+template <int RTYPE>
+inline typename Rcpp::Vector<RTYPE>::stored_type default_value() {
+ return Rcpp::Vector<RTYPE>::get_na();
+}
+template <>
+inline Rbyte default_value<RAWSXP>() {
+ return 0;
+}
+
} // namespace Rcpp
SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>&
chunked_array);
SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array);
std::shared_ptr<arrow::Array> Array__from_vector(SEXP x);
std::shared_ptr<arrow::RecordBatch>
RecordBatch__from_dataframe(Rcpp::DataFrame tbl);
+
+namespace arrow {
+namespace r {
+
+template <typename T>
+inline const T* GetValuesSafely(const std::shared_ptr<ArrayData>& data, int i,
+ int64_t offset) {
+ auto buffer = data->buffers[i];
+ if (!buffer) {
+ Rcpp::stop(tfm::format("invalid data in buffer %d", i));
+ };
+ return reinterpret_cast<const T*>(buffer->data()) + offset;
+}
+
+} // namespace r
+} // namespace arrow
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index c063a07..d562435 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -96,3 +96,149 @@ test_that("Array supports character vectors (ARROW-3339)", {
expect_equal(arr_chr$length(), 3L)
expect_identical(arr_chr$as_vector(), x)
})
+
+test_that("empty arrays are supported", {
+ x <- character()
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- integer()
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- numeric()
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- factor(character())
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- logical()
+ expect_equal(array(x)$as_vector(), x)
+})
+
+test_that("array with all nulls are supported", {
+ nas <- c(NA, NA)
+
+ x <- as.logical(nas)
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- as.integer(nas)
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- as.numeric(nas)
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- as.character(nas)
+ expect_equal(array(x)$as_vector(), x)
+
+ x <- as.factor(nas)
+ expect_equal(array(x)$as_vector(), x)
+})
+
+test_that("Array supports unordered factors (ARROW-3355)", {
+ # without NA
+ f <- factor(c("itsy", "bitsy", "spider", "spider"))
+ arr_fac <- array(f)
+ expect_equal(arr_fac$length(), 4L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_identical(arr_fac$as_vector(), f)
+ expect_true(arr_fac$IsValid(0))
+ expect_true(arr_fac$IsValid(1))
+ expect_true(arr_fac$IsValid(2))
+ expect_true(arr_fac$IsValid(3))
+
+ sl <- arr_fac$Slice(1)
+ expect_equal(sl$length(), 3L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_equal(sl$as_vector(), f[2:4])
+
+ # with NA
+ f <- factor(c("itsy", "bitsy", NA, "spider", "spider"))
+ # TODO: rm the suppressWarnings when
https://github.com/r-lib/vctrs/issues/109
+ arr_fac <- suppressWarnings(array(f))
+ expect_equal(arr_fac$length(), 5L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_identical(arr_fac$as_vector(), f)
+ expect_true(arr_fac$IsValid(0))
+ expect_true(arr_fac$IsValid(1))
+ expect_true(arr_fac$IsNull(2))
+ expect_true(arr_fac$IsValid(3))
+ expect_true(arr_fac$IsValid(4))
+
+ sl <- arr_fac$Slice(1)
+ expect_equal(sl$length(), 4L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_equal(sl$as_vector(), f[2:5])
+})
+
+test_that("Array supports ordered factors (ARROW-3355)", {
+ # without NA
+ f <- ordered(c("itsy", "bitsy", "spider", "spider"))
+ arr_fac <- array(f)
+ expect_equal(arr_fac$length(), 4L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_identical(arr_fac$as_vector(), f)
+ expect_true(arr_fac$IsValid(0))
+ expect_true(arr_fac$IsValid(1))
+ expect_true(arr_fac$IsValid(2))
+ expect_true(arr_fac$IsValid(3))
+
+ sl <- arr_fac$Slice(1)
+ expect_equal(sl$length(), 3L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_equal(sl$as_vector(), f[2:4])
+
+ # with NA
+ f <- ordered(c("itsy", "bitsy", NA, "spider", "spider"))
+ # TODO: rm the suppressWarnings when
https://github.com/r-lib/vctrs/issues/109
+ arr_fac <- suppressWarnings(array(f))
+ expect_equal(arr_fac$length(), 5L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_identical(arr_fac$as_vector(), f)
+ expect_true(arr_fac$IsValid(0))
+ expect_true(arr_fac$IsValid(1))
+ expect_true(arr_fac$IsNull(2))
+ expect_true(arr_fac$IsValid(3))
+ expect_true(arr_fac$IsValid(4))
+
+ sl <- arr_fac$Slice(1)
+ expect_equal(sl$length(), 4L)
+ expect_equal(arr_fac$type()$index_type(), int8())
+ expect_equal(sl$as_vector(), f[2:5])
+})
+
+test_that("array supports Date (ARROW-3340)", {
+ d <- Sys.Date() + 1:10
+ a <- array(d)
+ expect_equal(a$type(), date32())
+ expect_equal(a$length(), 10L)
+ expect_equal(a$as_vector(), d)
+
+ d[5] <- NA
+ a <- array(d)
+ expect_equal(a$type(), date32())
+ expect_equal(a$length(), 10L)
+ expect_equal(a$as_vector(), d)
+ expect_true(a$IsNull(4))
+
+ d2 <- d + .5
+ a <- array(d2)
+ expect_equal(a$type(), date32())
+ expect_equal(a$length(), 10L)
+ expect_equal(a$as_vector(), d)
+ expect_true(a$IsNull(4))
+})
+
+test_that("array supports POSIXct (ARROW-3340)", {
+ times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
+ a <- array(times)
+ expect_equal(a$type(), date64())
+ expect_equal(a$length(), 10L)
+ expect_equal(as.numeric(a$as_vector()), as.numeric(times))
+
+ times[5] <- NA
+ a <- array(times)
+ expect_equal(a$type(), date32())
+ expect_equal(a$length(), 10L)
+ expect_equal(as.numeric(a$as_vector()), as.numeric(times))
+ expect_true(a$IsNull(4))
+})
+
diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R
index e87175c..b479e5a 100644
--- a/r/tests/testthat/test-DataType.R
+++ b/r/tests/testthat/test-DataType.R
@@ -312,3 +312,15 @@ test_that("struct type works as expected", {
list(field("x", int32()), field("y", boolean()))
)
})
+
+test_that("DictionaryType works as expected (ARROW-3355)", {
+ d <- dictionary(int32(), array(c("foo", "bar", "baz")))
+ expect_equal(d, d)
+ expect_true(d == d)
+ expect_false(d == int32())
+ expect_equal(d$id(), Type$DICTIONARY)
+ expect_equal(d$bit_width(), 32L)
+ expect_equal(d$ToString(), "dictionary<values=string, indices=int32,
ordered=0>")
+ expect_equal(d$index_type(), int32())
+ expect_equal(d$dictionary(), array(c("foo", "bar", "baz")))
+})
diff --git a/r/tests/testthat/test-RecordBatch.R
b/r/tests/testthat/test-RecordBatch.R
index 75c59aa..e3557f8 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -21,22 +21,28 @@ test_that("RecordBatch", {
tbl <- tibble::tibble(
int = 1:10, dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
- chr = letters[1:10]
+ chr = letters[1:10],
+ fct = factor(letters[1:10])
)
batch <- record_batch(tbl)
expect_true(batch == batch)
expect_equal(
batch$schema(),
- schema(int = int32(), dbl = float64(), lgl = boolean(), chr = utf8())
+ schema(
+ int = int32(), dbl = float64(),
+ lgl = boolean(), chr = utf8(),
+ fct = dictionary(int32(), array(letters[1:10]))
+ )
)
- expect_equal(batch$num_columns(), 4L)
+ expect_equal(batch$num_columns(), 5L)
expect_equal(batch$num_rows(), 10L)
expect_equal(batch$column_name(0), "int")
expect_equal(batch$column_name(1), "dbl")
expect_equal(batch$column_name(2), "lgl")
expect_equal(batch$column_name(3), "chr")
- expect_equal(names(batch), c("int", "dbl", "lgl", "chr"))
+ expect_equal(batch$column_name(4), "fct")
+ expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct"))
col_int <- batch$column(0)
expect_true(inherits(col_int, 'arrow::Array'))
@@ -58,10 +64,16 @@ test_that("RecordBatch", {
expect_equal(col_chr$as_vector(), tbl$chr)
expect_equal(col_chr$type(), utf8())
+ col_fct <- batch$column(4)
+ expect_true(inherits(col_fct, 'arrow::Array'))
+ expect_equal(col_fct$as_vector(), tbl$fct)
+ expect_equal(col_fct$type(), dictionary(int32(), array(letters[1:10])))
+
+
batch2 <- batch$RemoveColumn(0)
expect_equal(
batch2$schema(),
- schema(dbl = float64(), lgl = boolean(), chr = utf8())
+ schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct =
dictionary(int32(), array(letters[1:10])))
)
expect_equal(batch2$column(0), batch$column(1))
expect_identical(as_tibble(batch2), tbl[,-1])
@@ -72,3 +84,33 @@ test_that("RecordBatch", {
batch4 <- batch$Slice(5, 2)
expect_identical(as_tibble(batch4), tbl[6:7,])
})
+
+test_that("RecordBatch with 0 rows are supported", {
+ tbl <- tibble::tibble(
+ int = integer(),
+ dbl = numeric(),
+ lgl = logical(),
+ chr = character(),
+ fct = factor(character(), levels = c("a", "b"))
+ )
+
+ batch <- record_batch(tbl)
+ expect_equal(batch$num_columns(), 5L)
+ expect_equal(batch$num_rows(), 0L)
+ expect_equal(
+ batch$schema(),
+ schema(
+ int = int32(),
+ dbl = float64(),
+ lgl = boolean(),
+ chr = utf8(),
+ fct = dictionary(int32(), array(c("a", "b")))
+ )
+ )
+
+ tf <- tempfile(); on.exit(unlink(tf))
+ batch$to_file(tf)
+
+ res <- read_record_batch(tf)
+ expect_equal(res, batch)
+})