arrow git commit: ARROW-797: [Python] Make more explicitly curated public API page, sphinx cleanup

uwe Thu, 13 Apr 2017 03:47:29 -0700

Repository: arrow
Updated Branches:
  refs/heads/master 3d9bfc2ae -> e93436503



ARROW-797: [Python] Make more explicitly curated public API page, sphinx cleanup

Author: Wes McKinney <wes.mckin...@twosigma.com>

Closes #535 from wesm/ARROW-797 and squashes the following commits:

bc344a8 [Wes McKinney] rat warning
fb1d916 [Wes McKinney] build_sphinx target needs extra options
00c6a03 [Wes McKinney] Remove sphinxext until it's actually needed. Add some 
ASF license headers
60d6ab6 [Wes McKinney] Update gitignore
2b9f3f9 [Wes McKinney] Add _static stub
80e4a4b [Wes McKinney] Remove unused options
b662b85 [Wes McKinney] Remove unused options
30ebd05 [Wes McKinney] Cleaning, explicit API index
83e31d5 [Wes McKinney] Initial API doc
d7f4ed7 [Wes McKinney] Add NumPy extensions from pandas


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e9343650
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e9343650
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e9343650

Branch: refs/heads/master
Commit: e9343650355b1820562bfa85d370cac2070b7c92
Parents: 3d9bfc2
Author: Wes McKinney <wes.mckin...@twosigma.com>
Authored: Thu Apr 13 12:46:58 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Thu Apr 13 12:46:58 2017 +0200

----------------------------------------------------------------------
 ci/travis_script_python.sh             |   2 +-
 python/cmake_modules/UseCython.cmake   |   5 +-
 python/doc/.gitignore                  |  22 +-
 python/doc/Makefile                    |   4 +-
 python/doc/conf.py                     | 377 ----------------------------
 python/doc/filesystems.rst             |  58 -----
 python/doc/getting_involved.rst        |  37 ---
 python/doc/index.rst                   |  48 ----
 python/doc/install.rst                 | 152 -----------
 python/doc/jemalloc.rst                |  52 ----
 python/doc/pandas.rst                  | 119 ---------
 python/doc/parquet.rst                 |  91 -------
 python/doc/source/_static/stub         |  18 ++
 python/doc/source/api.rst              | 153 +++++++++++
 python/doc/source/conf.py              | 375 +++++++++++++++++++++++++++
 python/doc/source/filesystems.rst      |  58 +++++
 python/doc/source/getting_involved.rst |  37 +++
 python/doc/source/index.rst            |  48 ++++
 python/doc/source/install.rst          | 152 +++++++++++
 python/doc/source/jemalloc.rst         |  52 ++++
 python/doc/source/pandas.rst           | 119 +++++++++
 python/doc/source/parquet.rst          |  91 +++++++
 22 files changed, 1128 insertions(+), 942 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 604cd13..680eb01 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -123,7 +123,7 @@ python_version_tests() {
   if [[ "$PYTHON_VERSION" == "3.6" ]]
   then
       pip install -r doc/requirements.txt
-      python setup.py build_sphinx
+      python setup.py build_sphinx -s doc/source
   fi
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/cmake_modules/UseCython.cmake
----------------------------------------------------------------------
diff --git a/python/cmake_modules/UseCython.cmake 
b/python/cmake_modules/UseCython.cmake
index cee6066..7c06b02 100644
--- a/python/cmake_modules/UseCython.cmake
+++ b/python/cmake_modules/UseCython.cmake
@@ -64,7 +64,7 @@ set( CYTHON_NO_DOCSTRINGS OFF
   CACHE BOOL "Strip docstrings from the compiled module." )
 set( CYTHON_FLAGS "" CACHE STRING
   "Extra flags to the cython compiler." )
-mark_as_advanced( CYTHON_ANNOTATE CYTHON_NO_DOCSTRINGS CYTHON_FLAGS )
+mark_as_advanced( CYTHON_ANNOTATE CYTHON_NO_DOCSTRINGS CYTHON_FLAGS)
 
 find_package( Cython REQUIRED )
 find_package( PythonLibsNew REQUIRED )
@@ -131,7 +131,8 @@ function( compile_pyx _name pyx_target_name generated_files 
pyx_file)
   # Add the command to run the compiler.
   add_custom_target(${pyx_target_name}
     COMMAND ${CYTHON_EXECUTABLE} ${cxx_arg} ${include_directory_arg}
-    ${annotate_arg} ${no_docstrings_arg} ${cython_debug_arg} ${CYTHON_FLAGS}
+    ${annotate_arg} ${no_docstrings_arg} ${cython_debug_arg}
+    ${CYTHON_FLAGS}
     --output-file "${_name}.${extension}" ${pyx_location}
     DEPENDS ${pyx_location}
     # do not specify byproducts for now since they don't work with the older

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/.gitignore
----------------------------------------------------------------------
diff --git a/python/doc/.gitignore b/python/doc/.gitignore
index 87d0413..3bee39f 100644
--- a/python/doc/.gitignore
+++ b/python/doc/.gitignore
@@ -1,3 +1,19 @@
-# auto-generated module documentation
-pyarrow*.rst
-modules.rst
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+_build
+source/generated
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/Makefile
----------------------------------------------------------------------
diff --git a/python/doc/Makefile b/python/doc/Makefile
index 7257583..65d6a4d 100644
--- a/python/doc/Makefile
+++ b/python/doc/Makefile
@@ -22,9 +22,9 @@ BUILDDIR      = _build
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) 
source
 # the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 
 .PHONY: help
 help:

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/conf.py
----------------------------------------------------------------------
diff --git a/python/doc/conf.py b/python/doc/conf.py
deleted file mode 100644
index e817bbd..0000000
--- a/python/doc/conf.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License. See accompanying LICENSE file.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import inspect
-import os
-import sys
-
-from sphinx import apidoc
-
-import sphinx_rtd_theme
-
-
-__location__ = os.path.join(os.getcwd(), os.path.dirname(
-        inspect.getfile(inspect.currentframe())))
-output_dir = os.path.join(__location__)
-module_dir = os.path.join(__location__, "..", "pyarrow")
-cmd_line_template = "sphinx-apidoc -f -e -o {outputdir} {moduledir}"
-cmd_line = cmd_line_template.format(outputdir=output_dir, moduledir=module_dir)
-apidoc.main(cmd_line.split(" "))
-
-on_rtd = os.environ.get('READTHEDOCS') == 'True'
-
-if not on_rtd:
-    # Hack: On RTD we use the pyarrow package from conda-forge as we cannot
-    # build pyarrow there.
-    sys.path.insert(0, os.path.abspath('..'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.doctest',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.napoleon'
-]
-
-# numpydoc configuration
-napoleon_use_rtype = False
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The encoding of source files.
-#
-# source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'pyarrow'
-copyright = u'2016 Apache Software Foundation'
-author = u'Apache Software Foundation'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = u''
-# The full version, including alpha/beta/rc tags.
-release = u''
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-# html_title = u'pyarrow v0.1.0'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-# html_logo = None
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 
32x32
-# pixels large.
-#
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'pyarrowdoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-     # The paper size ('letterpaper' or 'a4paper').
-     #
-     # 'papersize': 'letterpaper',
-
-     # The font size ('10pt', '11pt' or '12pt').
-     #
-     # 'pointsize': '10pt',
-
-     # Additional stuff for the LaTeX preamble.
-     #
-     # 'preamble': '',
-
-     # Latex figure (float) alignment
-     #
-     # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'pyarrow.tex', u'pyarrow Documentation',
-     u'Apache Arrow Team', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# It false, will not define \strong, \code,    itleref, \crossref ... but only
-# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
-# packages.
-#
-# latex_keep_old_macro_names = True
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
-     author, 'pyarrow', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/filesystems.rst
----------------------------------------------------------------------
diff --git a/python/doc/filesystems.rst b/python/doc/filesystems.rst
deleted file mode 100644
index 9e00ddd..0000000
--- a/python/doc/filesystems.rst
+++ /dev/null
@@ -1,58 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-File interfaces and Memory Maps
-===============================
-
-PyArrow features a number of file-like interfaces
-
-Hadoop File System (HDFS)
--------------------------
-
-PyArrow comes with bindings to a C++-based interface to the Hadoop File
-System. You connect like so:
-
-.. code-block:: python
-
-   import pyarrow as pa
-   hdfs = pa.HdfsClient(host, port, user=user, kerb_ticket=ticket_cache_path)
-
-By default, ``pyarrow.HdfsClient`` uses libhdfs, a JNI-based interface to the
-Java Hadoop client. This library is loaded **at runtime** (rather than at link
-/ library load time, since the library may not be in your LD_LIBRARY_PATH), and
-relies on some environment variables.
-
-* ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has
-  `lib/native/libhdfs.so`.
-
-* ``JAVA_HOME``: the location of your Java SDK installation.
-
-* ``ARROW_LIBHDFS_DIR`` (optional): explicit location of ``libhdfs.so`` if it 
is
-  installed somewhere other than ``$HADOOP_HOME/lib/native``.
-
-* ``CLASSPATH``: must contain the Hadoop jars. You can set these using:
-
-.. code-block:: shell
-
-    export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob`
-
-You can also use libhdfs3, a thirdparty C++ library for HDFS from Pivotal Labs:
-
-.. code-block:: python
-
-   hdfs3 = pa.HdfsClient(host, port, user=user, kerb_ticket=ticket_cache_path,
-                         driver='libhdfs3')

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/getting_involved.rst
----------------------------------------------------------------------
diff --git a/python/doc/getting_involved.rst b/python/doc/getting_involved.rst
deleted file mode 100644
index 90fa3e4..0000000
--- a/python/doc/getting_involved.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Getting Involved
-================
-
-Right now the primary audience for Apache Arrow are the developers of data
-systems; most people will use Apache Arrow indirectly through systems that use
-it for internal data handling and interoperating with other Arrow-enabled
-systems.
-
-Even if you do not plan to contribute to Apache Arrow itself or Arrow
-integrations in other projects, we'd be happy to have you involved:
-
- * Join the mailing list: send an email to 
-   `dev-subscr...@arrow.apache.org <mailto:dev-subscr...@arrow.apache.org>`_.
-   Share your ideas and use cases for the project or read through the
-   `Archive <http://mail-archives.apache.org/mod_mbox/arrow-dev/>`_.
- * Follow our activity on `JIRA <https://issues.apache.org/jira/browse/ARROW>`_
- * Learn the `Format / Specification
-   <https://github.com/apache/arrow/tree/master/format>`_
- * Chat with us on `Slack <https://apachearrowslackin.herokuapp.com/>`_
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/index.rst
----------------------------------------------------------------------
diff --git a/python/doc/index.rst b/python/doc/index.rst
deleted file mode 100644
index 608fff5..0000000
--- a/python/doc/index.rst
+++ /dev/null
@@ -1,48 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Apache Arrow (Python)
-=====================
-
-Arrow is a columnar in-memory analytics layer designed to accelerate big data.
-It houses a set of canonical in-memory representations of flat and hierarchical
-data along with multiple language-bindings for structure manipulation. It also
-provides IPC and common algorithm implementations.
-
-This is the documentation of the Python API of Apache Arrow. For more details
-on the format and other language bindings see
-`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only
-detail the usage of the Python API for Arrow and the leaf libraries that add
-additional functionality such as reading Apache Parquet files into Arrow
-structures.
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Getting Started
-
-   install
-   pandas
-   filesystems
-   parquet
-   modules
-   getting_involved
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Additional Features
-
-   jemalloc MemoryPool <jemalloc.rst>

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/install.rst
----------------------------------------------------------------------
diff --git a/python/doc/install.rst b/python/doc/install.rst
deleted file mode 100644
index 16d19ef..0000000
--- a/python/doc/install.rst
+++ /dev/null
@@ -1,152 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Install PyArrow
-===============
-
-Conda
------
-
-To install the latest version of PyArrow from conda-forge using conda:
-
-.. code-block:: bash
-
-    conda install -c conda-forge pyarrow
-
-Pip
----
-
-Install the latest version from PyPI:
-
-.. code-block:: bash
-
-    pip install pyarrow
-
-.. note::
-    Currently there are only binary artifcats available for Linux and MacOS.
-    Otherwise this will only pull the python sources and assumes an existing
-    installation of the C++ part of Arrow.
-    To retrieve the binary artifacts, you'll need a recent ``pip`` version that
-    supports features like the ``manylinux1`` tag.
-
-Building from source
---------------------
-
-First, clone the master git repository:
-
-.. code-block:: bash
-
-    git clone https://github.com/apache/arrow.git arrow
-
-System requirements
-~~~~~~~~~~~~~~~~~~~
-
-Building pyarrow requires:
-
-* A C++11 compiler
-
-  * Linux: gcc >= 4.8 or clang >= 3.5
-  * OS X: XCode 6.4 or higher preferred
-
-* `CMake <https://cmake.org/>`_
-
-Python requirements
-~~~~~~~~~~~~~~~~~~~
-
-You will need Python (CPython) 2.7, 3.4, or 3.5 installed. Earlier releases and
-are not being targeted.
-
-.. note::
-    This library targets CPython only due to an emphasis on interoperability 
with
-    pandas and NumPy, which are only available for CPython.
-
-The build requires NumPy, Cython, and a few other Python dependencies:
-
-.. code-block:: bash
-
-    pip install cython
-    cd arrow/python
-    pip install -r requirements.txt
-
-Installing Arrow C++ library
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-First, you should choose an installation location for Arrow C++. In the future
-using the default system install location will work, but for now we are being
-explicit:
-
-.. code-block:: bash
-    
-    export ARROW_HOME=$HOME/local
-
-Now, we build Arrow:
-
-.. code-block:: bash
-
-    cd arrow/cpp
-    
-    mkdir dev-build
-    cd dev-build
-    
-    cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..
-    
-    make
-    
-    # Use sudo here if $ARROW_HOME requires it
-    make install
-
-To get the optional Parquet support, you should also build and install 
-`parquet-cpp <https://github.com/apache/parquet-cpp/blob/master/README.md>`_.
-
-Install `pyarrow`
-~~~~~~~~~~~~~~~~~
-
-
-.. code-block:: bash
-
-    cd arrow/python
-
-    # --with-parquet enables the Apache Parquet support in PyArrow
-    # --with-jemalloc enables the jemalloc allocator support in PyArrow
-    # --build-type=release disables debugging information and turns on
-    #       compiler optimizations for native code
-    python setup.py build_ext --with-parquet --with-jemalloc 
--build-type=release install
-    python setup.py install
-
-.. warning::
-    On XCode 6 and prior there are some known OS X `@rpath` issues. If you are
-    unable to import pyarrow, upgrading XCode may be the solution.
-
-.. note::
-    In development installations, you will also need to set a correct
-    ``LD_LIBRARY_PATH``. This is most probably done with
-    ``export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH``.
-
-
-.. code-block:: python
-    
-    In [1]: import pyarrow
-
-    In [2]: pyarrow.from_pylist([1,2,3])
-    Out[2]:
-    <pyarrow.array.Int64Array object at 0x7f899f3e60e8>
-    [
-      1,
-      2,
-      3
-    ]
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/jemalloc.rst
----------------------------------------------------------------------
diff --git a/python/doc/jemalloc.rst b/python/doc/jemalloc.rst
deleted file mode 100644
index 33fe617..0000000
--- a/python/doc/jemalloc.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-jemalloc MemoryPool
-===================
-
-Arrow's default :class:`~pyarrow.memory.MemoryPool` uses the system's allocator
-through the POSIX APIs. Although this already provides aligned allocation, the
-POSIX interface doesn't support aligned reallocation. The default reallocation
-strategy is to allocate a new region, copy over the old data and free the
-previous region. Using `jemalloc <http://jemalloc.net/>`_ we can simply extend
-the existing memory allocation to the requested size. While this may still be
-linear in the size of allocated memory, it is magnitudes faster as only the 
page
-mapping in the kernel is touched, not the actual data.
-
-The :mod:`~pyarrow.jemalloc` allocator is not enabled by default to allow the
-use of the system allocator and/or other allocators like ``tcmalloc``. You can
-either explicitly make it the default allocator or pass it only to single
-operations.
-
-.. code:: python
-
-    import pyarrow as pa
-    import pyarrow.jemalloc
-    import pyarrow.memory
-
-    jemalloc_pool = pyarrow.jemalloc.default_pool()
-
-    # Explicitly use jemalloc for allocating memory for an Arrow Table object
-    array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool)
-
-    # Set the global pool
-    pyarrow.memory.set_default_pool(jemalloc_pool)
-    # This operation has no explicit MemoryPool specified and will thus will
-    # also use jemalloc for its allocations.
-    array = pa.Array.from_pylist([1, 2, 3])
-
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/pandas.rst
----------------------------------------------------------------------
diff --git a/python/doc/pandas.rst b/python/doc/pandas.rst
deleted file mode 100644
index 34445ae..0000000
--- a/python/doc/pandas.rst
+++ /dev/null
@@ -1,119 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Pandas Interface
-================
-
-To interface with Pandas, PyArrow provides various conversion routines to
-consume Pandas structures and convert back to them.
-
-DataFrames
-----------
-
-The equivalent to a Pandas DataFrame in Arrow is a 
:class:`pyarrow.table.Table`.
-Both consist of a set of named columns of equal length. While Pandas only
-supports flat columns, the Table also provides nested columns, thus it can
-represent more data than a DataFrame, so a full conversion is not always 
possible.
-
-Conversion from a Table to a DataFrame is done by calling
-:meth:`pyarrow.table.Table.to_pandas`. The inverse is then achieved by using
-:meth:`pyarrow.Table.from_pandas`. This conversion routine provides the
-convience parameter ``timestamps_to_ms``. Although Arrow supports timestamps of
-different resolutions, Pandas only supports nanosecond timestamps and most
-other systems (e.g. Parquet) only work on millisecond timestamps. This 
parameter
-can be used to already do the time conversion during the Pandas to Arrow
-conversion.
-
-.. code-block:: python
-
-    import pyarrow as pa
-    import pandas as pd
-
-    df = pd.DataFrame({"a": [1, 2, 3]})
-    # Convert from Pandas to Arrow
-    table = pa.Table.from_pandas(df)
-    # Convert back to Pandas
-    df_new = table.to_pandas()
-
-
-Series
-------
-
-In Arrow, the most similar structure to a Pandas Series is an Array.
-It is a vector that contains data of the same type as linear memory. You can
-convert a Pandas Series to an Arrow Array using 
:meth:`pyarrow.array.from_pandas_series`.
-As Arrow Arrays are always nullable, you can supply an optional mask using
-the ``mask`` parameter to mark all null-entries.
-
-Type differences
-----------------
-
-With the current design of Pandas and Arrow, it is not possible to convert all
-column types unmodified. One of the main issues here is that Pandas has no
-support for nullable columns of arbitrary type. Also ``datetime64`` is 
currently
-fixed to nanosecond resolution. On the other side, Arrow might be still missing
-support for some types.
-
-Pandas -> Arrow Conversion
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-+------------------------+--------------------------+
-| Source Type (Pandas)   | Destination Type (Arrow) |
-+========================+==========================+
-| ``bool``               | ``BOOL``                 |
-+------------------------+--------------------------+
-| ``(u)int{8,16,32,64}`` | ``(U)INT{8,16,32,64}``   |
-+------------------------+--------------------------+
-| ``float32``            | ``FLOAT``                |
-+------------------------+--------------------------+
-| ``float64``            | ``DOUBLE``               |
-+------------------------+--------------------------+
-| ``str`` / ``unicode``  | ``STRING``               |
-+------------------------+--------------------------+
-| ``pd.Categorical``     | ``DICTIONARY``           |
-+------------------------+--------------------------+
-| ``pd.Timestamp``       | ``TIMESTAMP(unit=ns)``   |
-+------------------------+--------------------------+
-| ``datetime.date``      | ``DATE``                 |
-+------------------------+--------------------------+
-
-Arrow -> Pandas Conversion
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-+-------------------------------------+--------------------------------------------------------+
-| Source Type (Arrow)                 | Destination Type (Pandas)              
                |
-+=====================================+========================================================+
-| ``BOOL``                            | ``bool``                               
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``BOOL`` *with nulls*               | ``object`` (with values ``True``, 
``False``, ``None``) |
-+-------------------------------------+--------------------------------------------------------+
-| ``(U)INT{8,16,32,64}``              | ``(u)int{8,16,32,64}``                 
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``(U)INT{8,16,32,64}`` *with nulls* | ``float64``                            
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``FLOAT``                           | ``float32``                            
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``DOUBLE``                          | ``float64``                            
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``STRING``                          | ``str``                                
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``DICTIONARY``                      | ``pd.Categorical``                     
                |
-+-------------------------------------+--------------------------------------------------------+
-| ``TIMESTAMP(unit=*)``               | ``pd.Timestamp`` 
(``np.datetime64[ns]``)               |
-+-------------------------------------+--------------------------------------------------------+
-| ``DATE``                            | ``pd.Timestamp`` 
(``np.datetime64[ns]``)               |
-+-------------------------------------+--------------------------------------------------------+

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/parquet.rst
----------------------------------------------------------------------
diff --git a/python/doc/parquet.rst b/python/doc/parquet.rst
deleted file mode 100644
index 8e011e4..0000000
--- a/python/doc/parquet.rst
+++ /dev/null
@@ -1,91 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Reading/Writing Parquet files
-=============================
-
-If you have built ``pyarrow`` with Parquet support, i.e. ``parquet-cpp`` was
-found during the build, you can read files in the Parquet format to/from Arrow
-memory structures. The Parquet support code is located in the
-:mod:`pyarrow.parquet` module and your package needs to be built with the
-``--with-parquet`` flag for ``build_ext``.
-
-Reading Parquet
----------------
-
-To read a Parquet file into Arrow memory, you can use the following code
-snippet. It will read the whole Parquet file into memory as an
-:class:`~pyarrow.table.Table`.
-
-.. code-block:: python
-
-    import pyarrow.parquet as pq
-
-    table = pq.read_table('<filename>')
-
-As DataFrames stored as Parquet are often stored in multiple files, a
-convenience method :meth:`~pyarrow.parquet.read_multiple_files` is provided.
-
-If you already have the Parquet available in memory or get it via non-file
-source, you can utilize :class:`pyarrow.io.BufferReader` to read it from
-memory. As input to the :class:`~pyarrow.io.BufferReader` you can either supply
-a Python ``bytes`` object or a :class:`pyarrow.io.Buffer`.
-
-.. code:: python
-
-    import pyarrow.io as paio
-    import pyarrow.parquet as pq
-
-    buf = ... # either bytes or paio.Buffer
-    reader = paio.BufferReader(buf)
-    table = pq.read_table(reader)
-
-Writing Parquet
----------------
-
-Given an instance of :class:`pyarrow.table.Table`, the most simple way to
-persist it to Parquet is by using the :meth:`pyarrow.parquet.write_table`
-method.
-
-.. code-block:: python
-
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-
-    table = pa.Table(..)
-    pq.write_table(table, '<filename>')
-
-By default this will write the Table as a single RowGroup using ``DICTIONARY``
-encoding. To increase the potential of parallelism a query engine can process
-a Parquet file, set the ``chunk_size`` to a fraction of the total number of 
rows.
-
-If you also want to compress the columns, you can select a compression
-method using the ``compression`` argument. Typically, ``GZIP`` is the choice if
-you want to minimize size and ``SNAPPY`` for performance.
-
-Instead of writing to a file, you can also write to Python ``bytes`` by
-utilizing an :class:`pyarrow.io.InMemoryOutputStream()`:
-
-.. code:: python
-
-    import pyarrow.io as paio
-    import pyarrow.parquet as pq
-
-    table = ...
-    output = paio.InMemoryOutputStream()
-    pq.write_table(table, output)
-    pybytes = output.get_result().to_pybytes()

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/_static/stub
----------------------------------------------------------------------
diff --git a/python/doc/source/_static/stub b/python/doc/source/_static/stub
new file mode 100644
index 0000000..765c78f
--- /dev/null
+++ b/python/doc/source/_static/stub
@@ -0,0 +1,18 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
new file mode 100644
index 0000000..514dcf9
--- /dev/null
+++ b/python/doc/source/api.rst
@@ -0,0 +1,153 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _api:
+
+*************
+API Reference
+*************
+
+.. _api.functions:
+
+Type Metadata and Schemas
+-------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   null
+   bool_
+   int8
+   int16
+   int32
+   int64
+   uint8
+   uint16
+   uint32
+   uint64
+   float16
+   float32
+   float64
+   timestamp
+   date32
+   date64
+   binary
+   string
+   decimal
+   list_
+   struct
+   dictionary
+   field
+   DataType
+   Field
+   Schema
+   schema
+
+Scalar Value Types
+------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   NA
+   NAType
+   Scalar
+   ArrayValue
+   Int8Value
+   Int16Value
+   Int32Value
+   Int64Value
+   UInt8Value
+   UInt16Value
+   UInt32Value
+   UInt64Value
+   FloatValue
+   DoubleValue
+   ListValue
+   BinaryValue
+   StringValue
+   FixedSizeBinaryValue
+
+Array Types
+-----------
+
+.. autosummary::
+   :toctree: generated/
+
+   Array
+   NumericArray
+   IntegerArray
+   FloatingPointArray
+   BooleanArray
+   Int8Array
+   Int16Array
+   Int32Array
+   Int64Array
+   UInt8Array
+   UInt16Array
+   UInt32Array
+   UInt64Array
+   DictionaryArray
+   StringArray
+
+Tables and Record Batches
+-------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   Column
+   RecordBatch
+   Table
+
+Tensor type and Functions
+-------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   Tensor
+   write_tensor
+   get_tensor_size
+   read_tensor
+
+Input / Output and Shared Memory
+--------------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   Buffer
+   BufferReader
+   InMemoryOutputStream
+   NativeFile
+   MemoryMappedFile
+   memory_map
+   create_memory_map
+   PythonFileInterface
+
+Interprocess Communication and Messaging
+----------------------------------------
+
+.. autosummary::
+   :toctree: generated/
+
+   FileReader
+   FileWriter
+   StreamReader
+   StreamWriter

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/conf.py
----------------------------------------------------------------------
diff --git a/python/doc/source/conf.py b/python/doc/source/conf.py
new file mode 100644
index 0000000..a9262bf
--- /dev/null
+++ b/python/doc/source/conf.py
@@ -0,0 +1,375 @@
+# -*- coding: utf-8 -*-
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import inspect
+import os
+import sys
+
+import sphinx_rtd_theme
+
+on_rtd = os.environ.get('READTHEDOCS') == 'True'
+
+if not on_rtd:
+    # Hack: On RTD we use the pyarrow package from conda-forge as we cannot
+    # build pyarrow there.
+    sys.path.insert(0, os.path.abspath('..'))
+
+sys.path.extend([
+    os.path.join(os.path.dirname(__file__),
+                 '..', '../..')
+
+])
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.doctest',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.napoleon',
+]
+
+# numpydoc configuration
+napoleon_use_rtype = False
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+import glob
+autosummary_generate = glob.glob("*.rst")
+
+# The encoding of source files.
+#
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pyarrow'
+copyright = u'2016 Apache Software Foundation'
+author = u'Apache Software Foundation'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u''
+# The full version, including alpha/beta/rc tags.
+release = u''
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'pyarrow v0.1.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+# html_logo = None
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 
32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+# html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pyarrowdoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'pyarrow.tex', u'pyarrow Documentation',
+     u'Apache Arrow Team', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# It false, will not define \strong, \code,    itleref, \crossref ... but only
+# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
+# packages.
+#
+# latex_keep_old_macro_names = True
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'pyarrow', u'pyarrow Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'pyarrow', u'pyarrow Documentation',
+     author, 'pyarrow', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/filesystems.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/filesystems.rst 
b/python/doc/source/filesystems.rst
new file mode 100644
index 0000000..9e00ddd
--- /dev/null
+++ b/python/doc/source/filesystems.rst
@@ -0,0 +1,58 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+File interfaces and Memory Maps
+===============================
+
+PyArrow features a number of file-like interfaces
+
+Hadoop File System (HDFS)
+-------------------------
+
+PyArrow comes with bindings to a C++-based interface to the Hadoop File
+System. You connect like so:
+
+.. code-block:: python
+
+   import pyarrow as pa
+   hdfs = pa.HdfsClient(host, port, user=user, kerb_ticket=ticket_cache_path)
+
+By default, ``pyarrow.HdfsClient`` uses libhdfs, a JNI-based interface to the
+Java Hadoop client. This library is loaded **at runtime** (rather than at link
+/ library load time, since the library may not be in your LD_LIBRARY_PATH), and
+relies on some environment variables.
+
+* ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has
+  `lib/native/libhdfs.so`.
+
+* ``JAVA_HOME``: the location of your Java SDK installation.
+
+* ``ARROW_LIBHDFS_DIR`` (optional): explicit location of ``libhdfs.so`` if it 
is
+  installed somewhere other than ``$HADOOP_HOME/lib/native``.
+
+* ``CLASSPATH``: must contain the Hadoop jars. You can set these using:
+
+.. code-block:: shell
+
+    export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob`
+
+You can also use libhdfs3, a thirdparty C++ library for HDFS from Pivotal Labs:
+
+.. code-block:: python
+
+   hdfs3 = pa.HdfsClient(host, port, user=user, kerb_ticket=ticket_cache_path,
+                         driver='libhdfs3')

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/getting_involved.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/getting_involved.rst 
b/python/doc/source/getting_involved.rst
new file mode 100644
index 0000000..90fa3e4
--- /dev/null
+++ b/python/doc/source/getting_involved.rst
@@ -0,0 +1,37 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Getting Involved
+================
+
+Right now the primary audience for Apache Arrow are the developers of data
+systems; most people will use Apache Arrow indirectly through systems that use
+it for internal data handling and interoperating with other Arrow-enabled
+systems.
+
+Even if you do not plan to contribute to Apache Arrow itself or Arrow
+integrations in other projects, we'd be happy to have you involved:
+
+ * Join the mailing list: send an email to 
+   `dev-subscr...@arrow.apache.org <mailto:dev-subscr...@arrow.apache.org>`_.
+   Share your ideas and use cases for the project or read through the
+   `Archive <http://mail-archives.apache.org/mod_mbox/arrow-dev/>`_.
+ * Follow our activity on `JIRA <https://issues.apache.org/jira/browse/ARROW>`_
+ * Learn the `Format / Specification
+   <https://github.com/apache/arrow/tree/master/format>`_
+ * Chat with us on `Slack <https://apachearrowslackin.herokuapp.com/>`_
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/index.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/index.rst b/python/doc/source/index.rst
new file mode 100644
index 0000000..ecb8e8f
--- /dev/null
+++ b/python/doc/source/index.rst
@@ -0,0 +1,48 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Apache Arrow (Python)
+=====================
+
+Arrow is a columnar in-memory analytics layer designed to accelerate big data.
+It houses a set of canonical in-memory representations of flat and hierarchical
+data along with multiple language-bindings for structure manipulation. It also
+provides IPC and common algorithm implementations.
+
+This is the documentation of the Python API of Apache Arrow. For more details
+on the format and other language bindings see
+`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only
+detail the usage of the Python API for Arrow and the leaf libraries that add
+additional functionality such as reading Apache Parquet files into Arrow
+structures.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
+
+   install
+   pandas
+   filesystems
+   parquet
+   api
+   getting_involved
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Additional Features
+
+   jemalloc MemoryPool <jemalloc.rst>

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/install.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/install.rst b/python/doc/source/install.rst
new file mode 100644
index 0000000..16d19ef
--- /dev/null
+++ b/python/doc/source/install.rst
@@ -0,0 +1,152 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Install PyArrow
+===============
+
+Conda
+-----
+
+To install the latest version of PyArrow from conda-forge using conda:
+
+.. code-block:: bash
+
+    conda install -c conda-forge pyarrow
+
+Pip
+---
+
+Install the latest version from PyPI:
+
+.. code-block:: bash
+
+    pip install pyarrow
+
+.. note::
+    Currently there are only binary artifcats available for Linux and MacOS.
+    Otherwise this will only pull the python sources and assumes an existing
+    installation of the C++ part of Arrow.
+    To retrieve the binary artifacts, you'll need a recent ``pip`` version that
+    supports features like the ``manylinux1`` tag.
+
+Building from source
+--------------------
+
+First, clone the master git repository:
+
+.. code-block:: bash
+
+    git clone https://github.com/apache/arrow.git arrow
+
+System requirements
+~~~~~~~~~~~~~~~~~~~
+
+Building pyarrow requires:
+
+* A C++11 compiler
+
+  * Linux: gcc >= 4.8 or clang >= 3.5
+  * OS X: XCode 6.4 or higher preferred
+
+* `CMake <https://cmake.org/>`_
+
+Python requirements
+~~~~~~~~~~~~~~~~~~~
+
+You will need Python (CPython) 2.7, 3.4, or 3.5 installed. Earlier releases and
+are not being targeted.
+
+.. note::
+    This library targets CPython only due to an emphasis on interoperability 
with
+    pandas and NumPy, which are only available for CPython.
+
+The build requires NumPy, Cython, and a few other Python dependencies:
+
+.. code-block:: bash
+
+    pip install cython
+    cd arrow/python
+    pip install -r requirements.txt
+
+Installing Arrow C++ library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First, you should choose an installation location for Arrow C++. In the future
+using the default system install location will work, but for now we are being
+explicit:
+
+.. code-block:: bash
+    
+    export ARROW_HOME=$HOME/local
+
+Now, we build Arrow:
+
+.. code-block:: bash
+
+    cd arrow/cpp
+    
+    mkdir dev-build
+    cd dev-build
+    
+    cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..
+    
+    make
+    
+    # Use sudo here if $ARROW_HOME requires it
+    make install
+
+To get the optional Parquet support, you should also build and install 
+`parquet-cpp <https://github.com/apache/parquet-cpp/blob/master/README.md>`_.
+
+Install `pyarrow`
+~~~~~~~~~~~~~~~~~
+
+
+.. code-block:: bash
+
+    cd arrow/python
+
+    # --with-parquet enables the Apache Parquet support in PyArrow
+    # --with-jemalloc enables the jemalloc allocator support in PyArrow
+    # --build-type=release disables debugging information and turns on
+    #       compiler optimizations for native code
+    python setup.py build_ext --with-parquet --with-jemalloc 
--build-type=release install
+    python setup.py install
+
+.. warning::
+    On XCode 6 and prior there are some known OS X `@rpath` issues. If you are
+    unable to import pyarrow, upgrading XCode may be the solution.
+
+.. note::
+    In development installations, you will also need to set a correct
+    ``LD_LIBRARY_PATH``. This is most probably done with
+    ``export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH``.
+
+
+.. code-block:: python
+    
+    In [1]: import pyarrow
+
+    In [2]: pyarrow.from_pylist([1,2,3])
+    Out[2]:
+    <pyarrow.array.Int64Array object at 0x7f899f3e60e8>
+    [
+      1,
+      2,
+      3
+    ]
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/jemalloc.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/jemalloc.rst b/python/doc/source/jemalloc.rst
new file mode 100644
index 0000000..33fe617
--- /dev/null
+++ b/python/doc/source/jemalloc.rst
@@ -0,0 +1,52 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+jemalloc MemoryPool
+===================
+
+Arrow's default :class:`~pyarrow.memory.MemoryPool` uses the system's allocator
+through the POSIX APIs. Although this already provides aligned allocation, the
+POSIX interface doesn't support aligned reallocation. The default reallocation
+strategy is to allocate a new region, copy over the old data and free the
+previous region. Using `jemalloc <http://jemalloc.net/>`_ we can simply extend
+the existing memory allocation to the requested size. While this may still be
+linear in the size of allocated memory, it is magnitudes faster as only the 
page
+mapping in the kernel is touched, not the actual data.
+
+The :mod:`~pyarrow.jemalloc` allocator is not enabled by default to allow the
+use of the system allocator and/or other allocators like ``tcmalloc``. You can
+either explicitly make it the default allocator or pass it only to single
+operations.
+
+.. code:: python
+
+    import pyarrow as pa
+    import pyarrow.jemalloc
+    import pyarrow.memory
+
+    jemalloc_pool = pyarrow.jemalloc.default_pool()
+
+    # Explicitly use jemalloc for allocating memory for an Arrow Table object
+    array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool)
+
+    # Set the global pool
+    pyarrow.memory.set_default_pool(jemalloc_pool)
+    # This operation has no explicit MemoryPool specified and will thus will
+    # also use jemalloc for its allocations.
+    array = pa.Array.from_pylist([1, 2, 3])
+
+

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/pandas.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/pandas.rst b/python/doc/source/pandas.rst
new file mode 100644
index 0000000..34445ae
--- /dev/null
+++ b/python/doc/source/pandas.rst
@@ -0,0 +1,119 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Pandas Interface
+================
+
+To interface with Pandas, PyArrow provides various conversion routines to
+consume Pandas structures and convert back to them.
+
+DataFrames
+----------
+
+The equivalent to a Pandas DataFrame in Arrow is a 
:class:`pyarrow.table.Table`.
+Both consist of a set of named columns of equal length. While Pandas only
+supports flat columns, the Table also provides nested columns, thus it can
+represent more data than a DataFrame, so a full conversion is not always 
possible.
+
+Conversion from a Table to a DataFrame is done by calling
+:meth:`pyarrow.table.Table.to_pandas`. The inverse is then achieved by using
+:meth:`pyarrow.Table.from_pandas`. This conversion routine provides the
+convience parameter ``timestamps_to_ms``. Although Arrow supports timestamps of
+different resolutions, Pandas only supports nanosecond timestamps and most
+other systems (e.g. Parquet) only work on millisecond timestamps. This 
parameter
+can be used to already do the time conversion during the Pandas to Arrow
+conversion.
+
+.. code-block:: python
+
+    import pyarrow as pa
+    import pandas as pd
+
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    # Convert from Pandas to Arrow
+    table = pa.Table.from_pandas(df)
+    # Convert back to Pandas
+    df_new = table.to_pandas()
+
+
+Series
+------
+
+In Arrow, the most similar structure to a Pandas Series is an Array.
+It is a vector that contains data of the same type as linear memory. You can
+convert a Pandas Series to an Arrow Array using 
:meth:`pyarrow.array.from_pandas_series`.
+As Arrow Arrays are always nullable, you can supply an optional mask using
+the ``mask`` parameter to mark all null-entries.
+
+Type differences
+----------------
+
+With the current design of Pandas and Arrow, it is not possible to convert all
+column types unmodified. One of the main issues here is that Pandas has no
+support for nullable columns of arbitrary type. Also ``datetime64`` is 
currently
+fixed to nanosecond resolution. On the other side, Arrow might be still missing
+support for some types.
+
+Pandas -> Arrow Conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++------------------------+--------------------------+
+| Source Type (Pandas)   | Destination Type (Arrow) |
++========================+==========================+
+| ``bool``               | ``BOOL``                 |
++------------------------+--------------------------+
+| ``(u)int{8,16,32,64}`` | ``(U)INT{8,16,32,64}``   |
++------------------------+--------------------------+
+| ``float32``            | ``FLOAT``                |
++------------------------+--------------------------+
+| ``float64``            | ``DOUBLE``               |
++------------------------+--------------------------+
+| ``str`` / ``unicode``  | ``STRING``               |
++------------------------+--------------------------+
+| ``pd.Categorical``     | ``DICTIONARY``           |
++------------------------+--------------------------+
+| ``pd.Timestamp``       | ``TIMESTAMP(unit=ns)``   |
++------------------------+--------------------------+
+| ``datetime.date``      | ``DATE``                 |
++------------------------+--------------------------+
+
+Arrow -> Pandas Conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++-------------------------------------+--------------------------------------------------------+
+| Source Type (Arrow)                 | Destination Type (Pandas)              
                |
++=====================================+========================================================+
+| ``BOOL``                            | ``bool``                               
                |
++-------------------------------------+--------------------------------------------------------+
+| ``BOOL`` *with nulls*               | ``object`` (with values ``True``, 
``False``, ``None``) |
++-------------------------------------+--------------------------------------------------------+
+| ``(U)INT{8,16,32,64}``              | ``(u)int{8,16,32,64}``                 
                |
++-------------------------------------+--------------------------------------------------------+
+| ``(U)INT{8,16,32,64}`` *with nulls* | ``float64``                            
                |
++-------------------------------------+--------------------------------------------------------+
+| ``FLOAT``                           | ``float32``                            
                |
++-------------------------------------+--------------------------------------------------------+
+| ``DOUBLE``                          | ``float64``                            
                |
++-------------------------------------+--------------------------------------------------------+
+| ``STRING``                          | ``str``                                
                |
++-------------------------------------+--------------------------------------------------------+
+| ``DICTIONARY``                      | ``pd.Categorical``                     
                |
++-------------------------------------+--------------------------------------------------------+
+| ``TIMESTAMP(unit=*)``               | ``pd.Timestamp`` 
(``np.datetime64[ns]``)               |
++-------------------------------------+--------------------------------------------------------+
+| ``DATE``                            | ``pd.Timestamp`` 
(``np.datetime64[ns]``)               |
++-------------------------------------+--------------------------------------------------------+

http://git-wip-us.apache.org/repos/asf/arrow/blob/e9343650/python/doc/source/parquet.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst
new file mode 100644
index 0000000..8e011e4
--- /dev/null
+++ b/python/doc/source/parquet.rst
@@ -0,0 +1,91 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Reading/Writing Parquet files
+=============================
+
+If you have built ``pyarrow`` with Parquet support, i.e. ``parquet-cpp`` was
+found during the build, you can read files in the Parquet format to/from Arrow
+memory structures. The Parquet support code is located in the
+:mod:`pyarrow.parquet` module and your package needs to be built with the
+``--with-parquet`` flag for ``build_ext``.
+
+Reading Parquet
+---------------
+
+To read a Parquet file into Arrow memory, you can use the following code
+snippet. It will read the whole Parquet file into memory as an
+:class:`~pyarrow.table.Table`.
+
+.. code-block:: python
+
+    import pyarrow.parquet as pq
+
+    table = pq.read_table('<filename>')
+
+As DataFrames stored as Parquet are often stored in multiple files, a
+convenience method :meth:`~pyarrow.parquet.read_multiple_files` is provided.
+
+If you already have the Parquet available in memory or get it via non-file
+source, you can utilize :class:`pyarrow.io.BufferReader` to read it from
+memory. As input to the :class:`~pyarrow.io.BufferReader` you can either supply
+a Python ``bytes`` object or a :class:`pyarrow.io.Buffer`.
+
+.. code:: python
+
+    import pyarrow.io as paio
+    import pyarrow.parquet as pq
+
+    buf = ... # either bytes or paio.Buffer
+    reader = paio.BufferReader(buf)
+    table = pq.read_table(reader)
+
+Writing Parquet
+---------------
+
+Given an instance of :class:`pyarrow.table.Table`, the most simple way to
+persist it to Parquet is by using the :meth:`pyarrow.parquet.write_table`
+method.
+
+.. code-block:: python
+
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+
+    table = pa.Table(..)
+    pq.write_table(table, '<filename>')
+
+By default this will write the Table as a single RowGroup using ``DICTIONARY``
+encoding. To increase the potential of parallelism a query engine can process
+a Parquet file, set the ``chunk_size`` to a fraction of the total number of 
rows.
+
+If you also want to compress the columns, you can select a compression
+method using the ``compression`` argument. Typically, ``GZIP`` is the choice if
+you want to minimize size and ``SNAPPY`` for performance.
+
+Instead of writing to a file, you can also write to Python ``bytes`` by
+utilizing an :class:`pyarrow.io.InMemoryOutputStream()`:
+
+.. code:: python
+
+    import pyarrow.io as paio
+    import pyarrow.parquet as pq
+
+    table = ...
+    output = paio.InMemoryOutputStream()
+    pq.write_table(table, output)
+    pybytes = output.get_result().to_pybytes()

arrow git commit: ARROW-797: [Python] Make more explicitly curated public API page, sphinx cleanup

Reply via email to