This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0ada875 ARROW-1861: [Python] Rework benchmark suite [skip ci]
0ada875 is described below
commit 0ada87531dca52d51d4f60d3148a9ba733d96a48
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 1 19:11:28 2018 +0100
ARROW-1861: [Python] Rework benchmark suite [skip ci]
This PR focusses on:
* ASV setup fixes
* splitting the benchmark file
* improving the array from/to pylist conversion benchmarks
Author: Antoine Pitrou <[email protected]>
Closes #1543 from pitrou/ARROW-1861-rework-benchmarks and squashes the
following commits:
b608579 [Antoine Pitrou] ARROW-1861: [Python] Rework benchmark suite [skip
ci]
---
python/README-benchmarks.md | 54 ++++
python/asv.conf.json | 98 ++++++-
python/benchmarks/array_ops.py | 35 +++
python/benchmarks/common.py | 84 ++++++
python/benchmarks/convert_builtins.py | 295 ++++++++++++++++++++++
python/benchmarks/{array.py => convert_pandas.py} | 42 +--
6 files changed, 561 insertions(+), 47 deletions(-)
diff --git a/python/README-benchmarks.md b/python/README-benchmarks.md
new file mode 100644
index 0000000..6389665
--- /dev/null
+++ b/python/README-benchmarks.md
@@ -0,0 +1,54 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Benchmarks
+
+The `pyarrow` package comes with a suite of benchmarks meant to
+run with [ASV](https://asv.readthedocs.io). You'll need to install
+the `asv` package first (`pip install asv`).
+
+## Running with your local tree
+
+When developing, the simplest and fastest way to run the benchmark suite
+against your local changes is to use the `asv dev` command. This will
+use your current Python interpreter and environment.
+
+## Running with arbitrary revisions
+
+ASV allows to store results and generate graphs of the benchmarks over
+the project's evolution. Doing this requires a bit more massaging
+currently.
+
+First you have to install our ASV fork:
+
+```shell
+pip install
git+https://github.com/pitrou/asv.git@issue-547-specify-project-subdir
+```
+
+<!--- TODO remove the above once
https://github.com/airspeed-velocity/asv/pull/611 is merged -->
+
+Then you need to set up a few environment variables:
+
+```shell
+export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1
+export PYARROW_BUNDLE_ARROW_CPP=1
+```
+
+Now you should be ready to run `asv run` or whatever other command
+suits your needs.
diff --git a/python/asv.conf.json b/python/asv.conf.json
index 2a1dd42..150153c 100644
--- a/python/asv.conf.json
+++ b/python/asv.conf.json
@@ -28,12 +28,17 @@
// The URL or local path of the source code repository for the
// project being benchmarked
- "repo": "https://github.com/apache/arrow/",
+ "repo": "..",
+
+ // The Python project's subdirectory in your repo. If missing or
+ // the empty string, the project is assumed to be located at the root
+ // of the repository.
+ "repo_subdir": "python",
// List of branches to benchmark. If not provided, defaults to "master"
- // (for git) or "tip" (for mercurial).
+ // (for git) or "default" (for mercurial).
// "branches": ["master"], // for git
- // "branches": ["tip"], // for mercurial
+ // "branches": ["default"], // for mercurial
// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
@@ -46,22 +51,72 @@
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
- "environment_type": "virtualenv",
+ "environment_type": "conda",
+ "conda_channels": ["conda-forge", "defaults"],
// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/apache/arrow/commit/",
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
- // "pythons": ["2.7", "3.3"],
+ "pythons": ["3.6"],
// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
- // list indicates to just test against the default (latest)
- // version.
+ // list or empty string indicates to just test against the default
+ // (latest) version. null indicates that the package is to not be
+ // installed. If the package to be tested is only available from
+ // PyPi, and the 'environment_type' is conda, then you can preface
+ // the package name by 'pip+', and the package will be installed via
+ // pip (with all the conda available packages installed first,
+ // followed by the pip installed packages).
+ //
// "matrix": {
- // "numpy": ["1.6", "1.7"]
+ // "numpy": ["1.6", "1.7"],
+ // "six": ["", null], // test with and without six installed
+ // "pip+emcee": [""], // emcee is only available for install with
pip.
// },
+ "matrix": {
+ "boost-cpp": [],
+ "cmake": [],
+ "cython": [],
+ "numpy": ["1.14"],
+ "pandas": ["0.22"],
+ "pip+setuptools_scm": [],
+ },
+
+ // Combinations of libraries/python versions can be excluded/included
+ // from the set to test. Each entry is a dictionary containing additional
+ // key-value pairs to include/exclude.
+ //
+ // An exclude entry excludes entries where all values match. The
+ // values are regexps that should match the whole string.
+ //
+ // An include entry adds an environment. Only the packages listed
+ // are installed. The 'python' key is required. The exclude rules
+ // do not apply to includes.
+ //
+ // In addition to package names, the following keys are available:
+ //
+ // - python
+ // Python version, as in the *pythons* variable above.
+ // - environment_type
+ // Environment type, as above.
+ // - sys_platform
+ // Platform, as in sys.platform. Possible values for the common
+ // cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+ //
+ // "exclude": [
+ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+ // {"environment_type": "conda", "six": null}, // don't run without
six on conda
+ // ],
+ //
+ // "include": [
+ // // additional env for python2.7
+ // {"python": "2.7", "numpy": "1.8"},
+ // // additional env if run on windows+conda
+ // {"platform": "win32", "environment_type": "conda", "python": "2.7",
"libpython": ""},
+ // ],
// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
@@ -71,7 +126,6 @@
// environments in. If not provided, defaults to "env"
"env_dir": ".asv/env",
-
// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": ".asv/results",
@@ -86,5 +140,29 @@
// `asv` will cache wheels of the recent builds in each
// environment, making them faster to install next time. This is
// number of builds to keep, per environment.
- // "wheel_cache_size": 0
+ // "wheel_cache_size": 0,
+
+ // The commits after which the regression search in `asv publish`
+ // should start looking for regressions. Dictionary whose keys are
+ // regexps matching to benchmark names, and values corresponding to
+ // the commit (exclusive) after which to start looking for
+ // regressions. The default is to start from the first commit
+ // with results. If the commit is `null`, regression detection is
+ // skipped for the matching benchmark.
+ //
+ // "regressions_first_commits": {
+ // "some_benchmark": "352cdf", // Consider regressions only after this
commit
+ // "another_benchmark": null, // Skip regression detection altogether
+ // }
+
+ // The thresholds for relative change in results, after which `asv
+ // publish` starts reporting regressions. Dictionary of the same
+ // form as in ``regressions_first_commits``, with values
+ // indicating the thresholds. If multiple entries match, the
+ // maximum is taken. If no entry matches, the default is 5%.
+ //
+ // "regressions_thresholds": {
+ // "some_benchmark": 0.01, // Threshold of 1%
+ // "another_benchmark": 0.5, // Threshold of 50%
+ // }
}
diff --git a/python/benchmarks/array_ops.py b/python/benchmarks/array_ops.py
new file mode 100644
index 0000000..70ee7f1
--- /dev/null
+++ b/python/benchmarks/array_ops.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import numpy as np
+import pyarrow as pa
+
+
+class ScalarAccess(object):
+ n = 10 ** 5
+
+ def setUp(self):
+ self._array = pa.array(list(range(self.n)), type=pa.int64())
+ self._array_items = list(self._array)
+
+ def time_getitem(self):
+ for i in range(self.n):
+ self._array[i]
+
+ def time_as_py(self):
+ for item in self._array_items:
+ item.as_py()
diff --git a/python/benchmarks/common.py b/python/benchmarks/common.py
new file mode 100644
index 0000000..7dd42fd
--- /dev/null
+++ b/python/benchmarks/common.py
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import codecs
+import os
+import sys
+import unicodedata
+
+import numpy as np
+
+
+def _multiplicate_sequence(base, target_size):
+ q, r = divmod(target_size, len(base))
+ return [base] * q + [base[:r]]
+
+
+def get_random_bytes(n):
+ rnd = np.random.RandomState(42)
+ # Computing a huge random bytestring can be costly, so we get at most
+ # 100KB and duplicate the result as needed
+ base_size = 100003
+ q, r = divmod(n, base_size)
+ if q == 0:
+ result = rnd.bytes(r)
+ else:
+ base = rnd.bytes(base_size)
+ result = b''.join(_multiplicate_sequence(base, n))
+ assert len(result) == n
+ return result
+
+
+def get_random_ascii(n):
+ arr = np.frombuffer(get_random_bytes(n), dtype=np.int8) & 0x7f
+ result, _ = codecs.ascii_decode(arr)
+ assert isinstance(result, str)
+ assert len(result) == n
+ return result
+
+
+def _random_unicode_letters(n):
+ """
+ Generate a string of random unicode letters (slow).
+ """
+ def _get_more_candidates():
+ return rnd.randint(0, sys.maxunicode, size=n).tolist()
+
+ rnd = np.random.RandomState(42)
+ out = []
+ candidates = []
+
+ while len(out) < n:
+ if not candidates:
+ candidates = _get_more_candidates()
+ ch = chr(candidates.pop())
+ # XXX Do we actually care that the code points are valid?
+ if unicodedata.category(ch)[0] == 'L':
+ out.append(ch)
+ return out
+
+
+_1024_random_unicode_letters = _random_unicode_letters(1024)
+
+
+def get_random_unicode(n):
+ indices = np.frombuffer(get_random_bytes(n * 2), dtype=np.int16) & 1023
+ unicode_arr = np.array(_1024_random_unicode_letters)[indices]
+
+ result = ''.join(unicode_arr.tolist())
+ assert len(result) == n, (len(result), len(unicode_arr))
+ return result
diff --git a/python/benchmarks/convert_builtins.py
b/python/benchmarks/convert_builtins.py
new file mode 100644
index 0000000..92b2b85
--- /dev/null
+++ b/python/benchmarks/convert_builtins.py
@@ -0,0 +1,295 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from functools import partial
+import itertools
+
+import numpy as np
+import pyarrow as pa
+
+from . import common
+
+
+DEFAULT_NONE_PROB = 0.3
+
+
+# TODO:
+# - test dates and times
+# - test decimals
+
+class BuiltinsGenerator(object):
+
+ def __init__(self, seed=42):
+ self.rnd = np.random.RandomState(seed)
+
+ def sprinkle_nones(self, lst, prob):
+ """
+ Sprinkle None entries in list *lst* with likelihood *prob*.
+ """
+ for i, p in enumerate(self.rnd.random_sample(size=len(lst))):
+ if p < prob:
+ lst[i] = None
+
+ def generate_int_list(self, n, none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of Python ints with *none_prob* probability of
+ an entry being None.
+ """
+ data = list(range(n))
+ self.sprinkle_nones(data, none_prob)
+ return data
+
+ def generate_float_list(self, n, none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of Python floats with *none_prob* probability of
+ an entry being None.
+ """
+ # Make sure we get Python floats, not np.float64
+ data = list(map(float, self.rnd.uniform(0.0, 1.0, n)))
+ assert len(data) == n
+ self.sprinkle_nones(data, none_prob)
+ return data
+
+ def generate_bool_list(self, n, none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of Python bools with *none_prob* probability of
+ an entry being None.
+ """
+ # Make sure we get Python bools, not np.bool_
+ data = [bool(x >= 0.5) for x in self.rnd.uniform(0.0, 1.0, n)]
+ assert len(data) == n
+ self.sprinkle_nones(data, none_prob)
+ return data
+
+ def _generate_varying_sequences(self, random_factory, n, min_size,
max_size, none_prob):
+ """
+ Generate a list of *n* sequences of varying size between *min_size*
+ and *max_size*, with *none_prob* probability of an entry being None.
+ The base material for each sequence is obtained by calling
+ `random_factory(<some size>)`
+ """
+ base_size = 10000
+ base = random_factory(base_size + max_size)
+ data = []
+ for i in range(n):
+ off = self.rnd.randint(base_size)
+ if min_size == max_size:
+ size = min_size
+ else:
+ size = self.rnd.randint(min_size, max_size + 1)
+ data.append(base[off:off + size])
+ self.sprinkle_nones(data, none_prob)
+ assert len(data) == n
+ return data
+
+ def generate_fixed_binary_list(self, n, size, none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of bytestrings with a fixed *size*.
+ """
+ return self._generate_varying_sequences(common.get_random_bytes, n,
+ size, size, none_prob)
+
+
+ def generate_varying_binary_list(self, n, min_size, max_size,
+ none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of bytestrings with a random size between
+ *min_size* and *max_size*.
+ """
+ return self._generate_varying_sequences(common.get_random_bytes, n,
+ min_size, max_size, none_prob)
+
+
+ def generate_ascii_string_list(self, n, min_size, max_size,
+ none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of ASCII strings with a random size between
+ *min_size* and *max_size*.
+ """
+ return self._generate_varying_sequences(common.get_random_ascii, n,
+ min_size, max_size, none_prob)
+
+
+ def generate_unicode_string_list(self, n, min_size, max_size,
+ none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of unicode strings with a random size between
+ *min_size* and *max_size*.
+ """
+ return self._generate_varying_sequences(common.get_random_unicode, n,
+ min_size, max_size, none_prob)
+
+
+ def generate_int_list_list(self, n, min_size, max_size,
+ none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of lists of Python ints with a random size between
+ *min_size* and *max_size*.
+ """
+ return self._generate_varying_sequences(
+ partial(self.generate_int_list, none_prob=none_prob),
+ n, min_size, max_size, none_prob)
+
+
+ def generate_dict_list(self, n, none_prob=DEFAULT_NONE_PROB):
+ """
+ Generate a list of dicts with a random size between *min_size* and
+ *max_size*.
+ Each dict has the form `{'u': int value, 'v': float value, 'w': bool
value}`
+ """
+ ints = self.generate_int_list(n, none_prob=none_prob)
+ floats = self.generate_float_list(n, none_prob=none_prob)
+ bools = self.generate_bool_list(n, none_prob=none_prob)
+ dicts = []
+ # Keep half the Nones, omit the other half
+ keep_nones = itertools.cycle([True, False])
+ for u, v, w in zip(ints, floats, bools):
+ d = {}
+ if u is not None or next(keep_nones):
+ d['u'] = u
+ if v is not None or next(keep_nones):
+ d['v'] = v
+ if w is not None or next(keep_nones):
+ d['w'] = w
+ dicts.append(d)
+ self.sprinkle_nones(dicts, none_prob)
+ assert len(dicts) == n
+ return dicts
+
+ def get_type_and_builtins(self, n, type_name):
+ """
+ Return a `(arrow type, list)` tuple where the arrow type
+ corresponds to the given logical *type_name*, and the list
+ is a list of *n* random-generated Python objects compatible
+ with the arrow type.
+ """
+ size = None
+
+ if type_name in ('bool', 'ascii', 'unicode', 'int64 list', 'struct'):
+ kind = type_name
+ elif type_name.startswith(('int', 'uint')):
+ kind = 'int'
+ elif type_name.startswith('float'):
+ kind = 'float'
+ elif type_name == 'binary':
+ kind = 'varying binary'
+ elif type_name.startswith('binary'):
+ kind = 'fixed binary'
+ size = int(type_name[6:])
+ assert size > 0
+ else:
+ raise ValueError("unrecognized type %r" % (type_name,))
+
+ if kind in ('int', 'float'):
+ ty = getattr(pa, type_name)()
+ elif kind == 'bool':
+ ty = pa.bool_()
+ elif kind == 'fixed binary':
+ ty = pa.binary(size)
+ elif kind == 'varying binary':
+ ty = pa.binary()
+ elif kind in ('ascii', 'unicode'):
+ ty = pa.string()
+ elif kind == 'int64 list':
+ ty = pa.list_(pa.int64())
+ elif kind == 'struct':
+ ty = pa.struct([pa.field('u', pa.int64()),
+ pa.field('v', pa.float64()),
+ pa.field('w', pa.bool_())])
+
+ factories = {
+ 'int': self.generate_int_list,
+ 'float': self.generate_float_list,
+ 'bool': self.generate_bool_list,
+ 'fixed binary': partial(self.generate_fixed_binary_list,
+ size=size),
+ 'varying binary': partial(self.generate_varying_binary_list,
+ min_size=3, max_size=40),
+ 'ascii': partial(self.generate_ascii_string_list,
+ min_size=3, max_size=40),
+ 'unicode': partial(self.generate_unicode_string_list,
+ min_size=3, max_size=40),
+ 'int64 list': partial(self.generate_int_list_list,
+ min_size=0, max_size=20),
+ 'struct': self.generate_dict_list,
+ }
+ data = factories[kind](n)
+ return ty, data
+
+
+class ConvertPyListToArray(object):
+ """
+ Benchmark pa.array(list of values, type=...)
+ """
+ size = 10 ** 5
+ types = ('int32', 'uint32', 'int64', 'uint64',
+ 'float32', 'float64', 'bool',
+ 'binary', 'binary10', 'ascii', 'unicode',
+ 'int64 list', 'struct')
+
+ param_names = ['type']
+ params = [types]
+
+ def setup(self, type_name):
+ gen = BuiltinsGenerator()
+ self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
+
+ def time_convert(self, *args):
+ pa.array(self.data, type=self.ty)
+
+
+class InferPyListToArray(object):
+ """
+ Benchmark pa.array(list of values) with type inference
+ """
+ size = 10 ** 5
+ types = ('int64', 'float64', 'bool', 'binary', 'ascii', 'unicode',
+ 'int64 list')
+ # TODO add 'struct' when supported
+
+ param_names = ['type']
+ params = [types]
+
+ def setup(self, type_name):
+ gen = BuiltinsGenerator()
+ self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
+
+ def time_infer(self, *args):
+ arr = pa.array(self.data)
+ assert arr.type == self.ty
+
+
+class ConvertArrayToPyList(object):
+ """
+ Benchmark pa.array.to_pylist()
+ """
+ size = 10 ** 5
+ types = ('int32', 'uint32', 'int64', 'uint64',
+ 'float32', 'float64', 'bool',
+ 'binary', 'binary10', 'ascii', 'unicode',
+ 'int64 list', 'struct')
+
+ param_names = ['type']
+ params = [types]
+
+ def setup(self, type_name):
+ gen = BuiltinsGenerator()
+ self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
+ self.arr = pa.array(self.data, type=self.ty)
+
+ def time_convert(self, *args):
+ self.arr.to_pylist()
diff --git a/python/benchmarks/array.py b/python/benchmarks/convert_pandas.py
similarity index 59%
rename from python/benchmarks/array.py
rename to python/benchmarks/convert_pandas.py
index e22c0f7..c4a7a59 100644
--- a/python/benchmarks/array.py
+++ b/python/benchmarks/convert_pandas.py
@@ -17,21 +17,7 @@
import numpy as np
import pandas as pd
-import pyarrow as A
-
-
-class PyListConversions(object):
- param_names = ('size',)
- params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
-
- def setup(self, n):
- self.data = list(range(n))
-
- def time_from_pylist(self, n):
- A.from_pylist(self.data)
-
- def peakmem_from_pylist(self, n):
- A.from_pylist(self.data)
+import pyarrow as pa
class PandasConversionsBase(object):
@@ -46,37 +32,19 @@ class PandasConversionsBase(object):
class PandasConversionsToArrow(PandasConversionsBase):
param_names = ('size', 'dtype')
- params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64',
'float64_nans', 'str'))
+ params = ((10, 10 ** 6), ('int64', 'float64', 'float64_nans', 'str'))
def time_from_series(self, n, dtype):
- A.Table.from_pandas(self.data)
-
- def peakmem_from_series(self, n, dtype):
- A.Table.from_pandas(self.data)
+ pa.Table.from_pandas(self.data)
class PandasConversionsFromArrow(PandasConversionsBase):
param_names = ('size', 'dtype')
- params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64',
'float64_nans', 'str'))
+ params = ((10, 10 ** 6), ('int64', 'float64', 'float64_nans', 'str'))
def setup(self, n, dtype):
super(PandasConversionsFromArrow, self).setup(n, dtype)
- self.arrow_data = A.Table.from_pandas(self.data)
+ self.arrow_data = pa.Table.from_pandas(self.data)
def time_to_series(self, n, dtype):
self.arrow_data.to_pandas()
-
- def peakmem_to_series(self, n, dtype):
- self.arrow_data.to_pandas()
-
-
-class ScalarAccess(object):
- param_names = ('size',)
- params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
-
- def setUp(self, n):
- self._array = A.from_pylist(list(range(n)))
-
- def time_as_py(self, n):
- for i in range(n):
- self._array[i].as_py()
--
To stop receiving notification emails like this one, please contact
[email protected].