http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/HAWQInputFormatforMapReduce.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/HAWQInputFormatforMapReduce.html.md.erb 
b/datamgmt/HAWQInputFormatforMapReduce.html.md.erb
deleted file mode 100644
index a6fcca2..0000000
--- a/datamgmt/HAWQInputFormatforMapReduce.html.md.erb
+++ /dev/null
@@ -1,304 +0,0 @@
----
-title: HAWQ InputFormat for MapReduce
----
-
-MapReduce is a programming model developed by Google for processing and 
generating large data sets on an array of commodity servers. You can use the 
HAWQ InputFormat class to enable MapReduce jobs to access HAWQ data stored in 
HDFS.
-
-To use HAWQ InputFormat, you need only to provide the URL of the database to 
connect to, along with the table name you want to access. HAWQ InputFormat 
fetches only the metadata of the database and table of interest, which is much 
less data than the table data itself. After getting the metadata, HAWQ 
InputFormat determines where and how the table data is stored in HDFS. It reads 
and parses those HDFS files and processes the parsed table tuples directly 
inside a Map task.
-
-This chapter describes the document format and schema for defining HAWQ 
MapReduce jobs.
-
-## <a id="supporteddatatypes"></a>Supported Data Types
-
-HAWQ InputFormat supports the following data types:
-
-| SQL/HAWQ                | JDBC/JAVA                                        | 
setXXX        | getXXX        |
-|-------------------------|--------------------------------------------------|---------------|---------------|
-| DECIMAL/NUMERIC         | java.math.BigDecimal                             | 
setBigDecimal | getBigDecimal |
-| FLOAT8/DOUBLE PRECISION | double                                           | 
setDouble     | getDouble     |
-| INT8/BIGINT             | long                                             | 
setLong       | getLong       |
-| INTEGER/INT4/INT        | int                                              | 
setInt        | getInt        |
-| FLOAT4/REAL             | float                                            | 
setFloat      | getFloat      |
-| SMALLINT/INT2           | short                                            | 
setShort      | getShort      |
-| BOOL/BOOLEAN            | boolean                                          | 
setBoolean    | getBoolean    |
-| VARCHAR/CHAR/TEXT       | String                                           | 
setString     | getString     |
-| DATE                    | java.sql.Date                                    | 
setDate       | getDate       |
-| TIME/TIMETZ             | java.sql.Time                                    | 
setTime       | getTime       |
-| TIMESTAMP/TIMSTAMPTZ    | java.sql.Timestamp                               | 
setTimestamp  | getTimestamp  |
-| ARRAY                   | java.sq.Array                                    | 
setArray      | getArray      |
-| BIT/VARBIT              | com.pivotal.hawq.mapreduce.datatype.             | 
setVarbit     | getVarbit     |
-| BYTEA                   | byte\[\]                                         | 
setByte       | getByte       |
-| INTERVAL                | com.pivotal.hawq.mapreduce.datatype.HAWQInterval | 
setInterval   | getInterval   |
-| POINT                   | com.pivotal.hawq.mapreduce.datatype.HAWQPoint    | 
setPoint      | getPoint      |
-| LSEG                    | com.pivotal.hawq.mapreduce.datatype.HAWQLseg     | 
setLseg       | getLseg       |
-| BOX                     | com.pivotal.hawq.mapreduce.datatype.HAWQBox      | 
setBox        | getBox        |
-| CIRCLE                  | com.pivotal.hawq.mapreduce.datatype.HAWQCircle   | 
setVircle     | getCircle     |
-| PATH                    | com.pivotal.hawq.mapreduce.datatype.HAWQPath     | 
setPath       | getPath       |
-| POLYGON                 | com.pivotal.hawq.mapreduce.datatype.HAWQPolygon  | 
setPolygon    | getPolygon    |
-| MACADDR                 | com.pivotal.hawq.mapreduce.datatype.HAWQMacaddr  | 
setMacaddr    | getMacaddr    |
-| INET                    | com.pivotal.hawq.mapreduce.datatype.HAWQInet     | 
setInet       | getInet       |
-| CIDR                    | com.pivotal.hawq.mapreduce.datatype.HAWQCIDR     | 
setCIDR       | getCIDR       |
-
-## <a id="hawqinputformatexample"></a>HAWQ InputFormat Example
-
-The following example shows how you can use the `HAWQInputFormat` class to 
access HAWQ table data from MapReduce jobs.
-
-``` java
-package com.mycompany.app;
-import com.pivotal.hawq.mapreduce.HAWQException;
-import com.pivotal.hawq.mapreduce.HAWQInputFormat;
-import com.pivotal.hawq.mapreduce.HAWQRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.io.IntWritable;
-
-import java.io.IOException;
-public class HAWQInputFormatDemoDriver extends Configured
-implements Tool {
-
-    // CREATE TABLE employees (
-    // id INTEGER NOT NULL, name VARCHAR(32) NOT NULL);
-    public static class DemoMapper extends
-        Mapper<Void, HAWQRecord, IntWritable, Text> {
-       int id = 0;
-       String name = null;
-       public void map(Void key, HAWQRecord value, Context context)
-        throws IOException, InterruptedException {
-        try {
-        id = value.getInt(1);
-        name = value.getString(2);
-        } catch (HAWQException hawqE) {
-        throw new IOException(hawqE.getMessage());
-        }
-        context.write(new IntWritable(id), new Text(name));
-       }
-    }
-    private static int printUsage() {
-       System.out.println("HAWQInputFormatDemoDriver
-       <database_url> <table_name> <output_path> [username]
-       [password]");
-       ToolRunner.printGenericCommandUsage(System.out);
-       return 2;
-    }
- 
-    public int run(String[] args) throws Exception {
-       if (args.length < 3) {
-        return printUsage();
-       }
-       Job job = Job.getInstance(getConf());
-       job.setJobName("hawq-inputformat-demo");
-       job.setJarByClass(HAWQInputFormatDemoDriver.class);
-       job.setMapperClass(DemoMapper.class);
-       job.setMapOutputValueClass(Text.class);
-       job.setOutputValueClass(Text.class);
-       String db_url = args[0];
-       String table_name = args[1];
-       String output_path = args[2];
-       String user_name = null;
-       if (args.length > 3) {
-         user_name = args[3];
-       }
-       String password = null;
-       if (args.length > 4) {
-         password = args[4];
-       }
-       job.setInputFormatClass(HAWQInputFormat.class);
-       HAWQInputFormat.setInput(job.getConfiguration(), db_url,
-       user_name, password, table_name);
-       FileOutputFormat.setOutputPath(job, new
-       Path(output_path));
-       job.setNumReduceTasks(0);
-       int res = job.waitForCompletion(true) ? 0 : 1;
-       return res;
-    }
-    
-    public static void main(String[] args) throws Exception {
-       int res = ToolRunner.run(new Configuration(),
-         new HAWQInputFormatDemoDriver(), args);
-       System.exit(res);
-    }
-}
-```
-
-**To compile and run the example:**
-
-1.  Create a work directory:
-
-    ``` shell
-    $ mkdir mrwork
-    $ cd mrwork
-    ```
- 
-2.  Copy and paste the Java code above into a `.java` file.
-
-    ``` shell
-    $ mkdir -p com/mycompany/app
-    $ cd com/mycompany/app
-    $ vi HAWQInputFormatDemoDriver.java
-    ```
-
-3.  Note the following dependencies required for compilation:
-    1.  `HAWQInputFormat` jars (located in the 
`$GPHOME/lib/postgresql/hawq-mr-io` directory):
-        -   `hawq-mapreduce-common.jar`
-        -   `hawq-mapreduce-ao.jar`
-        -   `hawq-mapreduce-parquet.jar`
-        -   `hawq-mapreduce-tool.jar`
-
-    2.  Required 3rd party jars (located in the 
`$GPHOME/lib/postgresql/hawq-mr-io/lib` directory):
-        -   `parquet-common-1.1.0.jar`
-        -   `parquet-format-1.1.0.jar`
-        -   `parquet-hadoop-1.1.0.jar`
-        -   `postgresql-n.n-n-jdbc4.jar`
-        -   `snakeyaml-n.n.jar`
-
-    3.  Hadoop Mapreduce related jars (located in the install directory of 
your Hadoop distribution).
-
-4.  Compile the Java program.  You may choose to use a different compilation 
command:
-
-    ``` shell
-    javac -classpath 
/usr/hdp/2.4.2.0-258/hadoop-mapreduce/*:/usr/local/hawq/lib/postgresql/hawq-mr-io/*:/usr/local/hawq/lib/postgresql/hawq-mr-io/lib/*:/usr/hdp/current/hadoop-client/*
 HAWQInputFormatDemoDriver.java
-    ```
-   
-5.  Build the JAR file.
-
-    ``` shell
-    $ cd ../../..
-    $ jar cf my-app.jar com
-    $ cp my-app.jar /tmp
-    ```
-    
-6.  Check that you have installed HAWQ and HDFS and your HAWQ cluster is 
running.
-
-7.  Create sample table:
-    1.  Log in to HAWQ:
-
-        ``` shell
-         $ psql -d postgres 
-        ```
-
-    2.  Create the table:
-
-        ``` sql
-        CREATE TABLE employees (
-        id INTEGER NOT NULL,
-        name TEXT NOT NULL);
-        ```
-
-        Or a Parquet table:
-
-        ``` sql
-        CREATE TABLE employees ( id INTEGER NOT NULL, name TEXT NOT NULL) WITH 
(APPENDONLY=true, ORIENTATION=parquet);
-        ```
-
-    3.  Insert one tuple:
-
-        ``` sql
-        INSERT INTO employees VALUES (1, 'Paul');
-        \q
-        ```
-8.  Ensure the system `pg_hba.conf` configuration file is set up to allow 
`gpadmin` access to the `postgres` database.
-
-8.  Use the following shell script snippet showing how to run the Mapreduce 
job:
-
-    ``` shell
-    #!/bin/bash
-    
-    # set up environment variables
-    HAWQMRLIB=/usr/local/hawq/lib/postgresql/hawq-mr-io
-    export 
HADOOP_CLASSPATH=$HAWQMRLIB/hawq-mapreduce-ao.jar:$HAWQMRLIB/hawq-mapreduce-common.jar:$HAWQMRLIB/hawq-mapreduce-tool.jar:$HAWQMRLIB/hawq-mapreduce-parquet.jar:$HAWQMRLIB/lib/postgresql-9.2-1003-jdbc4.jar:$HAWQMRLIB/lib/snakeyaml-1.12.jar:$HAWQMRLIB/lib/parquet-hadoop-1.1.0.jar:$HAWQMRLIB/lib/parquet-common-1.1.0.jar:$HAWQMRLIB/lib/parquet-format-1.0.0.jar
-    export 
LIBJARS=$HAWQMRLIB/hawq-mapreduce-ao.jar,$HAWQMRLIB/hawq-mapreduce-common.jar,$HAWQMRLIB/hawq-mapreduce-tool.jar,$HAWQMRLIB/lib/postgresql-9.2-1003-jdbc4.jar,$HAWQMRLIB/lib/snakeyaml-1.12.jar,$HAWQMRLIB/hawq-mapreduce-parquet.jar,$HAWQMRLIB/lib/parquet-hadoop-1.1.0.jar,$HAWQMRLIB/lib/parquet-common-1.1.0.jar,$HAWQMRLIB/lib/parquet-format-1.0.0.jar
-    
-    # usage:  hadoop jar JARFILE CLASSNAME -libjars JARS <database_url> 
<table_name> <output_path_on_HDFS>
-    #   - writing output to HDFS, so run as hdfs user
-    #   - if not using the default postgres port, replace 5432 with port 
number for your HAWQ cluster
-    HADOOP_USER_NAME=hdfs hadoop jar /tmp/my-app.jar 
com.mycompany.app.HAWQInputFormatDemoDriver -libjars $LIBJARS 
localhost:5432/postgres employees /tmp/employees
-    ```
-    
-    The MapReduce job output is written to the `/tmp/employees` directory on 
the HDFS file system.
-
-9.  Use the following command to check the result of the Mapreduce job:
-
-    ``` shell
-    $ sudo -u hdfs hdfs dfs -ls /tmp/employees
-    $ sudo -u hdfs hdfs dfs -cat /tmp/employees/*
-    ```
-
-    The output will appear as follows:
-
-    ``` pre
-    1 Paul
-    ```
-        
-10.  If you choose to run the program again, delete the output file and 
directory:
-    
-    ``` shell
-    $ sudo -u hdfs hdfs dfs -rm /tmp/employees/*
-    $ sudo -u hdfs hdfs dfs -rmdir /tmp/employees
-    ```
-
-## <a id="accessinghawqdata"></a>Accessing HAWQ Data
-
-You can access HAWQ data using the `HAWQInputFormat.setInput()` interface.  
You will use a different API signature depending on whether HAWQ is running or 
not.
-
--   When HAWQ is running, use `HAWQInputFormat.setInput(Configuration conf, 
String db_url, String username, String password, String tableName)`.
--   When HAWQ is not running, first extract the table metadata to a file with 
the Metadata Export Tool and then use `HAWQInputFormat.setInput(Configuration 
conf, String pathStr)`.
-
-### <a id="hawqinputformatsetinput"></a>HAWQ is Running
-
-``` java
-  /**
-    * Initializes the map-part of the job with the appropriate input settings
-    * through connecting to Database.
-    *
-    * @param conf
-    * The map-reduce job configuration
-    * @param db_url
-    * The database URL to connect to
-    * @param username
-    * The username for setting up a connection to the database
-    * @param password
-    * The password for setting up a connection to the database
-    * @param tableName
-    * The name of the table to access to
-    * @throws Exception
-    */
-public static void setInput(Configuration conf, String db_url,
-    String username, String password, String tableName)
-throws Exception;
-```
-
-### <a id="metadataexporttool"></a>HAWQ is not Running
-
-Use the metadata export tool, `hawq extract`, to export the metadata of the 
target table into a local YAML file:
-
-``` shell
-$ hawq extract [-h hostname] [-p port] [-U username] [-d database] [-o 
output_file] [-W] <tablename>
-```
-
-Using the extracted metadata, access HAWQ data through the following 
interface.  Pass the complete path to the `.yaml` file in the `pathStr` 
argument.
-
-``` java
- /**
-   * Initializes the map-part of the job with the appropriate input settings 
through reading metadata file stored in local filesystem.
-   *
-   * To get metadata file, please use hawq extract first
-   *
-   * @param conf
-   * The map-reduce job configuration
-   * @param pathStr
-   * The metadata file path in local filesystem. e.g.
-   * /home/gpadmin/metadata/postgres_test
-   * @throws Exception
-   */
-public static void setInput(Configuration conf, String pathStr)
-   throws Exception;
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/Transactions.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/Transactions.html.md.erb 
b/datamgmt/Transactions.html.md.erb
deleted file mode 100644
index dfc9a5e..0000000
--- a/datamgmt/Transactions.html.md.erb
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: Working with Transactions
----
-
-This topic describes transaction support in HAWQ.
-
-Transactions allow you to bundle multiple SQL statements in one all-or-nothing 
operation.
-
-The following are the HAWQ SQL transaction commands:
-
--   `BEGIN` or `START TRANSACTION `starts a transaction block.
--   `END` or `COMMIT` commits the results of a transaction.
--   `ROLLBACK` abandons a transaction without making any changes.
--   `SAVEPOINT` marks a place in a transaction and enables partial rollback. 
You can roll back commands executed after a savepoint while maintaining 
commands executed before the savepoint.
--   `ROLLBACK TO SAVEPOINT `rolls back a transaction to a savepoint.
--   `RELEASE SAVEPOINT `destroys a savepoint within a transaction.
-
-## <a id="topic8"></a>Transaction Isolation Levels
-
-HAWQ accepts the standard SQL transaction levels as follows:
-
--   *read uncommitted* and *read committed* behave like the standard *read 
committed*
--   serializable and repeatable read behave like the standard serializable
-
-The following information describes the behavior of the HAWQ transaction 
levels:
-
--   **read committed/read uncommitted** — Provides fast, simple, partial 
transaction isolation. With read committed and read uncommitted transaction 
isolation, `SELECT` transactions operate on a snapshot of the database taken 
when the query started.
-
-A `SELECT` query:
-
--   Sees data committed before the query starts.
--   Sees updates executed within the transaction.
--   Does not see uncommitted data outside the transaction.
--   Can possibly see changes that concurrent transactions made if the 
concurrent transaction is committed after the initial read in its own 
transaction.
-
-Successive `SELECT` queries in the same transaction can see different data if 
other concurrent transactions commit changes before the queries start.
-
-Read committed or read uncommitted transaction isolation may be inadequate for 
applications that perform complex queries and require a consistent view of the 
database.
-
--   **serializable/repeatable read** — Provides strict transaction isolation 
in which transactions execute as if they run one after another rather than 
concurrently. Applications on the serializable or repeatable read level must be 
designed to retry transactions in case of serialization failures.
-
-A `SELECT` query:
-
--   Sees a snapshot of the data as of the start of the transaction (not as of 
the start of the current query within the transaction).
--   Sees only data committed before the query starts.
--   Sees updates executed within the transaction.
--   Does not see uncommitted data outside the transaction.
--   Does not see changes that concurrent transactions made.
-
-    Successive `SELECT` commands within a single transaction always see the 
same data.
-
-The default transaction isolation level in HAWQ is *read committed*. To change 
the isolation level for a transaction, declare the isolation level when you 
`BEGIN` the transaction or use the `SET TRANSACTION` command after the 
transaction starts.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/about_statistics.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/about_statistics.html.md.erb 
b/datamgmt/about_statistics.html.md.erb
deleted file mode 100644
index 5e2184a..0000000
--- a/datamgmt/about_statistics.html.md.erb
+++ /dev/null
@@ -1,209 +0,0 @@
----
-title: About Database Statistics
----
-
-## <a id="overview"></a>Overview
-
-Statistics are metadata that describe the data stored in the database. The 
query optimizer needs up-to-date statistics to choose the best execution plan 
for a query. For example, if a query joins two tables and one of them must be 
broadcast to all segments, the optimizer can choose the smaller of the two 
tables to minimize network traffic.
-
-The statistics used by the optimizer are calculated and saved in the system 
catalog by the `ANALYZE` command. There are three ways to initiate an analyze 
operation:
-
--   You can run the `ANALYZE` command directly.
--   You can run the `analyzedb` management utility outside of the database, at 
the command line.
--   An automatic analyze operation can be triggered when DML operations are 
performed on tables that have no statistics or when a DML operation modifies a 
number of rows greater than a specified threshold.
-
-These methods are described in the following sections.
-
-Calculating statistics consumes time and resources, so HAWQ produces estimates 
by calculating statistics on samples of large tables. In most cases, the 
default settings provide the information needed to generate correct execution 
plans for queries. If the statistics produced are not producing optimal query 
execution plans, the administrator can tune configuration parameters to produce 
more accurate stastistics by increasing the sample size or the granularity of 
statistics saved in the system catalog. Producing more accurate statistics has 
CPU and storage costs and may not produce better plans, so it is important to 
view explain plans and test query performance to ensure that the additional 
statistics-related costs result in better query performance.
-
-## <a id="topic_oq3_qxj_3s"></a>System Statistics
-
-### <a id="tablesize"></a>Table Size
-
-The query planner seeks to minimize the disk I/O and network traffic required 
to execute a query, using estimates of the number of rows that must be 
processed and the number of disk pages the query must access. The data from 
which these estimates are derived are the `pg_class` system table columns 
`reltuples` and `relpages`, which contain the number of rows and pages at the 
time a `VACUUM` or `ANALYZE` command was last run. As rows are added, the 
numbers become less accurate. However, an accurate count of disk pages is 
always available from the operating system, so as long as the ratio of 
`reltuples` to `relpages` does not change significantly, the optimizer can 
produce an estimate of the number of rows that is sufficiently accurate to 
choose the correct query execution plan.
-
-In append-optimized tables, the number of tuples is kept up-to-date in the 
system catalogs, so the `reltuples` statistic is not an estimate. Non-visible 
tuples in the table are subtracted from the total. The `relpages` value is 
estimated from the append-optimized block sizes.
-
-When the `reltuples` column differs significantly from the row count returned 
by `SELECT COUNT(*)`, an analyze should be performed to update the statistics.
-
-### <a id="views"></a>The pg\_statistic System Table and pg\_stats View
-
-The `pg_statistic` system table holds the results of the last `ANALYZE` 
operation on each database table. There is a row for each column of every 
table. It has the following columns:
-
-starelid  
-The object ID of the table or index the column belongs to.
-
-staatnum  
-The number of the described column, beginning with 1.
-
-stanullfrac  
-The fraction of the column's entries that are null.
-
-stawidth  
-The average stored width, in bytes, of non-null entries.
-
-stadistinct  
-The number of distinct nonnull data values in the column.
-
-stakind*N*  
-A code number indicating the kind of statistics stored in the *N*th slot of 
the `pg_statistic` row.
-
-staop*N*  
-An operator used to derive the statistics stored in the *N*th slot.
-
-stanumbers*N*  
-Numerical statistics of the appropriate kind for the *N*th slot, or NULL if 
the slot kind does not involve numerical values.
-
-stavalues*N*  
-Column data values of the appropriate kind for the *N*th slot, or NULL if the 
slot kind does not store any data values.
-
-The statistics collected for a column vary for different data types, so the 
`pg_statistic` table stores statistics that are appropriate for the data type 
in four *slots*, consisting of four columns per slot. For example, the first 
slot, which normally contains the most common values for a column, consists of 
the columns `stakind1`, `staop1`, `stanumbers1`, and `stavalues1`. Also see 
[pg\_statistic](../reference/catalog/pg_statistic.html#topic1).
-
-The `stakindN` columns each contain a numeric code to describe the type of 
statistics stored in their slot. The `stakind` code numbers from 1 to 99 are 
reserved for core PostgreSQL data types. HAWQ uses code numbers 1, 2, and 3. A 
value of 0 means the slot is unused. The following table describes the kinds of 
statistics stored for the three codes.
-
-<a id="topic_oq3_qxj_3s__table_upf_1yc_nt"></a>
-
-<table>
-<caption><span class="tablecap">Table 1. Contents of pg_statistic 
&quot;slots&quot;</span></caption>
-<colgroup>
-<col width="50%" />
-<col width="50%" />
-</colgroup>
-<thead>
-<tr class="header">
-<th>stakind Code</th>
-<th>Description</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>1</td>
-<td><em>Most CommonValues (MCV) Slot</em>
-<ul>
-<li><code class="ph codeph">staop</code> contains the object ID of the 
&quot;=&quot; operator, used to decide whether values are the same or not.</li>
-<li><code class="ph codeph">stavalues</code> contains an array of the 
<em>K</em> most common non-null values appearing in the column.</li>
-<li><code class="ph codeph">stanumbers</code> contains the frequencies 
(fractions of total row count) of the values in the <code class="ph 
codeph">stavalues</code> array.</li>
-</ul>
-The values are ordered in decreasing frequency. Since the arrays are 
variable-size, <em>K</em> can be chosen by the statistics collector. Values 
must occur more than once to be added to the <code class="ph 
codeph">stavalues</code> array; a unique column has no MCV slot.</td>
-</tr>
-<tr class="even">
-<td>2</td>
-<td><em>Histogram Slot</em> – describes the distribution of scalar data.
-<ul>
-<li><code class="ph codeph">staop</code> is the object ID of the 
&quot;&lt;&quot; operator, which describes the sort ordering.</li>
-<li><code class="ph codeph">stavalues</code> contains <em>M</em> (where 
<em>M</em>&gt;=2) non-null values that divide the non-null column data values 
into <em>M</em>-1 bins of approximately equal population. The first <code 
class="ph codeph">stavalues</code> item is the minimum value and the last is 
the maximum value.</li>
-<li><code class="ph codeph">stanumbers</code> is not used and should be 
null.</li>
-</ul>
-<p>If a Most Common Values slot is also provided, then the histogram describes 
the data distribution after removing the values listed in the MCV array. (It is 
a <em>compressed histogram</em> in the technical parlance). This allows a more 
accurate representation of the distribution of a column with some very common 
values. In a column with only a few distinct values, it is possible that the 
MCV list describes the entire data population; in this case the histogram 
reduces to empty and should be omitted.</p></td>
-</tr>
-<tr class="odd">
-<td>3</td>
-<td><em>Correlation Slot</em> – describes the correlation between the 
physical order of table tuples and the ordering of data values of this column.
-<ul>
-<li><code class="ph codeph">staop</code> is the object ID of the 
&quot;&lt;&quot; operator. As with the histogram, more than one entry could 
theoretically appear.</li>
-<li><code class="ph codeph">stavalues</code> is not used and should be 
NULL.</li>
-<li><code class="ph codeph">stanumbers</code> contains a single entry, the 
correlation coefficient between the sequence of data values and the sequence of 
their actual tuple positions. The coefficient ranges from +1 to -1.</li>
-</ul></td>
-</tr>
-</tbody>
-</table>
-
-The `pg_stats` view presents the contents of `pg_statistic` in a friendlier 
format. For more information, see 
[pg\_stats](../reference/catalog/pg_stats.html#topic1).
-
-Newly created tables and indexes have no statistics.
-
-### <a id="topic_oq3_qxj_3s__section_wsy_1rv_mt"></a>Sampling
-
-When calculating statistics for large tables, HAWQ creates a smaller table by 
sampling the base table. If the table is partitioned, samples are taken from 
all partitions.
-
-If a sample table is created, the number of rows in the sample is calculated 
to provide a maximum acceptable relative error. The amount of acceptable error 
is specified with the `gp_analyze_relative_error` system configuration 
parameter, which is set to .25 (25%) by default. This is usually sufficiently 
accurate to generate correct query plans. If `ANALYZE` is not producing good 
estimates for a table column, you can increase the sample size by setting the 
`gp_analyze_relative_error` configuration parameter to a lower value. Beware 
that setting this parameter to a low value can lead to a very large sample size 
and dramatically increase analyze time.
-
-### <a id="topic_oq3_qxj_3s__section_u5p_brv_mt"></a>Updating Statistics
-
-Running `ANALYZE` with no arguments updates statistics for all tables in the 
database. This could take a very long time, so it is better to analyze tables 
selectively after data has changed. You can also analyze a subset of the 
columns in a table, for example columns used in joins, `WHERE` clauses, `SORT` 
clauses, `GROUP BY` clauses, or `HAVING` clauses.
-
-See the SQL Command Reference for details of running the `ANALYZE` command.
-
-Refer to the Management Utility Reference for details of running the 
`analyzedb` command.
-
-### <a id="topic_oq3_qxj_3s__section_cv2_crv_mt"></a>Analyzing Partitioned and 
Append-Optimized Tables
-
-When the `ANALYZE` command is run on a partitioned table, it analyzes each 
leaf-level subpartition, one at a time. You can run `ANALYZE` on just new or 
changed partition files to avoid analyzing partitions that have not changed. If 
a table is partitioned, you can analyze just new or changed partitions.
-
-The `analyzedb` command-line utility skips unchanged partitions automatically. 
It also runs concurrent sessions so it can analyze several partitions 
concurrently. It runs five sessions by default, but the number of sessions can 
be set from 1 to 10 with the `-p` command-line option. Each time `analyzedb` 
runs, it saves state information for append-optimized tables and partitions in 
the `db_analyze` directory in the master data directory. The next time it runs, 
`analyzedb` compares the current state of each table with the saved state and 
skips analyzing a table or partition if it is unchanged. Heap tables are always 
analyzed.
-
-If the Pivotal Query Optimizer is enabled, you also need to run `ANALYZE       
      ROOTPARTITION` to refresh the root partition statistics. The Pivotal 
Query Optimizer requires statistics at the root level for partitioned tables. 
The legacy optimizer does not use these statistics. Enable the Pivotal Query 
Optimizer by setting both the `optimizer` and 
`optimizer_analyze_root_partition` system configuration parameters to on. The 
root level statistics are then updated when you run `ANALYZE` or `ANALYZE 
ROOTPARTITION`. The time to run `ANALYZE ROOTPARTITION` is similar to the time 
to analyze a single partition since `ANALYZE ROOTPARTITION`. The `analyzedb` 
utility updates root partition statistics by default .
-
-## <a id="topic_gyb_qrd_2t"></a>Configuring Statistics
-
-There are several options for configuring HAWQ statistics collection.
-
-### <a id="statstarget"></a>Statistics Target
-
-The statistics target is the size of the `most_common_vals`, 
`most_common_freqs`, and `histogram_bounds` arrays for an individual column. By 
default, the target is 25. The default target can be changed by setting a 
server configuration parameter and the target can be set for any column using 
the `ALTER TABLE` command. Larger values increase the time needed to do 
`ANALYZE`, but may improve the quality of the legacy query optimizer (planner) 
estimates.
-
-Set the system default statistics target to a different value by setting the 
`default_statistics_target` server configuration parameter. The default value 
is usually sufficient, and you should only raise or lower it if your tests 
demonstrate that query plans improve with the new target. 
-
-You will perform different procedures to set server configuration parameters 
for your whole HAWQ cluster depending upon whether you manage your cluster from 
the command line or use Ambari. If you use Ambari to manage your HAWQ cluster, 
you must ensure that you update server configuration parameters via the Ambari 
Web UI only. If you manage your HAWQ cluster from the command line, you will 
use the `hawq config` command line utility to set server configuration 
parameters.
-
-The following examples show how to raise the default statistics target from 25 
to 50.
-
-If you use Ambari to manage your HAWQ cluster:
-
-1. Set the `default_statistics_target` configuration property to `50` via the 
HAWQ service **Configs > Advanced > Custom hawq-site** drop down.
-2. Select **Service Actions > Restart All** to load the updated configuration.
-
-If you manage your HAWQ cluster from the command line:
-
-1.  Log in to the HAWQ master host as a HAWQ administrator and source the file 
`/usr/local/hawq/greenplum_path.sh`.
-
-    ``` shell
-    $ source /usr/local/hawq/greenplum_path.sh
-    ```
-
-1. Use the `hawq config` utility to set `default_statistics_target`:
-
-    ``` shell
-    $ hawq config -c default_statistics_target -v 50
-    ```
-2. Reload the HAWQ configuration:
-
-    ``` shell
-    $ hawq stop cluster -u
-    ```
-
-The statististics target for individual columns can be set with the `ALTER     
        TABLE` command. For example, some queries can be improved by increasing 
the target for certain columns, especially columns that have irregular 
distributions. You can set the target to zero for columns that never contribute 
to query optimization. When the target is 0, `ANALYZE` ignores the column. For 
example, the following `ALTER TABLE` command sets the statistics target for the 
`notes` column in the `emp` table to zero:
-
-``` sql
-ALTER TABLE emp ALTER COLUMN notes SET STATISTICS 0;
-```
-
-The statistics target can be set in the range 0 to 1000, or set it to -1 to 
revert to using the system default statistics target.
-
-Setting the statistics target on a parent partition table affects the child 
partitions. If you set statistics to 0 on some columns on the parent table, the 
statistics for the same columns are set to 0 for all children partitions. 
However, if you later add or exchange another child partition, the new child 
partition will use either the default statistics target or, in the case of an 
exchange, the previous statistics target. Therefore, if you add or exchange 
child partitions, you should set the statistics targets on the new child table.
-
-### <a id="topic_gyb_qrd_2t__section_j3p_drv_mt"></a>Automatic Statistics 
Collection
-
-HAWQ can be set to automatically run `ANALYZE` on a table that either has no 
statistics or has changed significantly when certain operations are performed 
on the table. For partitioned tables, automatic statistics collection is only 
triggered when the operation is run directly on a leaf table, and then only the 
leaf table is analyzed.
-
-Automatic statistics collection has three modes:
-
--   `none` disables automatic statistics collection.
--   `on_no_stats` triggers an analyze operation for a table with no existing 
statistics when any of the commands `CREATE TABLE AS SELECT`, `INSERT`, or 
`COPY` are executed on the table.
--   `on_change` triggers an analyze operation when any of the commands `CREATE 
TABLE AS SELECT`, `INSERT`, or `COPY` are executed on the table and the number 
of rows affected exceeds the threshold defined by the 
`gp_autostats_on_change_threshold` configuration parameter.
-
-The automatic statistics collection mode is set separately for commands that 
occur within a procedural language function and commands that execute outside 
of a function:
-
--   The `gp_autostats_mode` configuration parameter controls automatic 
statistics collection behavior outside of functions and is set to `on_no_stats` 
by default.
-
-With the `on_change` mode, `ANALYZE` is triggered only if the number of rows 
affected exceeds the threshold defined by the 
`gp_autostats_on_change_threshold` configuration parameter. The default value 
for this parameter is a very high value, 2147483647, which effectively disables 
automatic statistics collection; you must set the threshold to a lower number 
to enable it. The `on_change` mode could trigger large, unexpected analyze 
operations that could disrupt the system, so it is not recommended to set it 
globally. It could be useful in a session, for example to automatically analyze 
a table following a load.
-
-To disable automatic statistics collection outside of functions, set the 
`gp_autostats_mode` parameter to `none`. For a command-line-managed HAWQ 
cluster:
-
-``` shell
-$ hawq configure -c gp_autostats_mode -v none
-```
-
-For an Ambari-managed cluster, set `gp_autostats_mode` via the Ambari Web UI.
-
-Set the `log_autostats` system configuration parameter to `on` if you want to 
log automatic statistics collection operations.

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/dml.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/dml.html.md.erb b/datamgmt/dml.html.md.erb
deleted file mode 100644
index 681883a..0000000
--- a/datamgmt/dml.html.md.erb
+++ /dev/null
@@ -1,35 +0,0 @@
----
-title: Managing Data with HAWQ
----
-
-This chapter provides information about manipulating data and concurrent 
access in HAWQ.
-
--   **[Basic Data Operations](../datamgmt/BasicDataOperations.html)**
-
-    This topic describes basic data operations that you perform in HAWQ.
-
--   **[About Database Statistics](../datamgmt/about_statistics.html)**
-
-    An overview of statistics gathered by the `ANALYZE` command in HAWQ.
-
--   **[Concurrency Control](../datamgmt/ConcurrencyControl.html)**
-
-    This topic discusses the mechanisms used in HAWQ to provide concurrency 
control.
-
--   **[Working with Transactions](../datamgmt/Transactions.html)**
-
-    This topic describes transaction support in HAWQ.
-
--   **[Loading and Unloading 
Data](../datamgmt/load/g-loading-and-unloading-data.html)**
-
-    The topics in this section describe methods for loading and writing data 
into and out of HAWQ, and how to format data files.
-
--   **[Using PXF with Unmanaged Data](../pxf/HawqExtensionFrameworkPXF.html)**
-
-    HAWQ Extension Framework (PXF) is an extensible framework that allows HAWQ 
to query external system data. 
-
--   **[HAWQ InputFormat for 
MapReduce](../datamgmt/HAWQInputFormatforMapReduce.html)**
-
-    MapReduce is a programming model developed by Google for processing and 
generating large data sets on an array of commodity servers. You can use the 
HAWQ InputFormat option to enable MapReduce jobs to access HAWQ data stored in 
HDFS.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/client-loadtools.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/client-loadtools.html.md.erb 
b/datamgmt/load/client-loadtools.html.md.erb
deleted file mode 100644
index fe291d0..0000000
--- a/datamgmt/load/client-loadtools.html.md.erb
+++ /dev/null
@@ -1,104 +0,0 @@
----
-title: Client-Based HAWQ Load Tools
----
-HAWQ supports data loading from Red Hat Enterprise Linux 5, 6, and 7 and 
Windows XP client systems. HAWQ Load Tools include both a loader program and a 
parallel file distribution program.
-
-This topic presents the instructions to install the HAWQ Load Tools on your 
client machine. It also includes the information necessary to configure HAWQ 
databases to accept remote client connections.
-
-## <a id="installloadrunrhel"></a>RHEL Load Tools
-
-The RHEL Load Tools are provided in a HAWQ distribution. 
-
-
-### <a id="installloadrunux"></a>Installing the RHEL Loader
-
-1. Download a HAWQ installer package or build HAWQ from source.
- 
-2. Refer to the HAWQ command line install instructions to set up your package 
repositories and install the HAWQ binary.
-
-3. Install the `libevent` and `libyaml` packages. These libraries are required 
by the HAWQ file server. You must have superuser privileges on the system.
-
-    ``` shell
-    $ sudo yum install -y libevent libyaml
-    ```
-
-### <a id="installrhelloadabout"></a>About the RHEL Loader Installation
-
-The files/directories of interest in a HAWQ RHEL Load Tools installation 
include:
-
-`bin/` — data loading command-line tools 
([gpfdist](../../reference/cli/admin_utilities/gpfdist.html) and [hawq 
load](../../reference/cli/admin_utilities/hawqload.html))   
-`greenplum_path.sh` — environment set up file
-
-### <a id="installloadrhelcfgenv"></a>Configuring the RHEL Load Environment
-
-A `greenplum_path.sh` file is located in the HAWQ base install directory 
following installation. Source `greenplum_path.sh` before running the HAWQ RHEL 
Load Tools to set up your HAWQ environment:
-
-``` shell
-$ . /usr/local/hawq/greenplum_path.sh
-```
-
-Continue to [Using the HAWQ File Server 
(gpfdist)](g-using-the-hawq-file-server--gpfdist-.html) for specific 
information about using the HAWQ load tools.
-
-## <a id="installloadrunwin"></a>Windows Load Tools
-
-### <a id="installpythonwin"></a>Installing Python 2.5
-The HAWQ Load Tools for Windows requires that the 32-bit version of Python 2.5 
be installed on your system. 
-
-**Note**: The 64-bit version of Python is **not** compatible with the HAWQ 
Load Tools for Windows.
-
-1. Download the [Python 2.5 installer for 
Windows](https://www.python.org/downloads/).  Make note of the directory to 
which it was downloaded.
-
-2. Double-click on the `python Load Tools for Windows-2.5.x.msi` package to 
launch the installer.
-3. Select **Install for all users** and click **Next**.
-4. The default Python install location is `C:\Pythonxx`. Click **Up** or 
**New** to choose another location. Click **Next**.
-5. Click **Next** to install the selected Python components.
-6. Click **Finish** to complete the Python installation.
-
-
-### <a id="installloadrunwin"></a>Running the Windows Installer
-
-1. Download the `greenplum-loaders-4.3.x.x-build-n-WinXP-x86_32.msi` installer 
package from [Pivotal 
Network](https://network.pivotal.io/products/pivotal-gpdb). Make note of the 
directory to which it was downloaded.
- 
-2. Double-click the `greenplum-loaders-4.3.x.x-build-n-WinXP-x86_32.msi` file 
to launch the installer.
-3. Click **Next** on the **Welcome** screen.
-4. Click **I Agree** on the **License Agreement** screen.
-5. The default install location for HAWQ Loader Tools for Windows is 
`C:\"Program Files (x86)"\Greenplum\greenplum-loaders-4.3.8.1-build-1`. Click 
**Browse** to choose another location.
-6. Click **Next**.
-7. Click **Install** to begin the installation.
-8. Click **Finish** to exit the installer.
-
-    
-### <a id="installloadabout"></a>About the Windows Loader Installation
-Your HAWQ Windows Load Tools installation includes the following files and 
directories:
-
-`bin/` — data loading command-line tools 
([gpfdist](http://gpdb.docs.pivotal.io/4380/client_tool_guides/load/unix/gpfdist.html)
 and 
[gpload](http://gpdb.docs.pivotal.io/4380/client_tool_guides/load/unix/gpload.html))
  
-`lib/` — data loading library files  
-`greenplum_loaders_path.bat` — environment set up file
-
-
-### <a id="installloadcfgenv"></a>Configuring the Windows Load Environment
-
-A `greenplum_loaders_path.bat` file is provided in your load tools base 
install directory following installation. This file sets the following 
environment variables:
-
-- `GPHOME_LOADERS` - base directory of loader installation
-- `PATH` - adds the loader and component program directories
-- `PYTHONPATH` - adds component library directories
-
-Execute `greenplum_loaders_path.bat` to set up your HAWQ environment before 
running the HAWQ Windows Load Tools.
- 
-
-## <a id="installloadenableclientconn"></a>Enabling Remote Client Connections
-The HAWQ master database must be configured to accept remote client 
connections.  Specifically, you need to identify the client hosts and database 
users that will be connecting to the HAWQ database.
-
-1. Ensure that the HAWQ database master `pg_hba.conf` file is correctly 
configured to allow connections from the desired users operating on the desired 
database from the desired hosts, using the authentication method you choose. 
For details, see [Configuring Client 
Access](../../clientaccess/client_auth.html#topic2).
-
-    Make sure the authentication method you choose is supported by the client 
tool you are using.
-    
-2. If you edited the `pg_hba.conf` file, reload the server configuration. If 
you have any active database connections, you must include the `-M fast` option 
in the `hawq stop` command:
-
-    ``` shell
-    $ hawq stop cluster -u [-M fast]
-    ```
-   
-
-3. Verify and/or configure the databases and roles you are using to connect, 
and that the roles have the correct privileges to the database objects.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/creating-external-tables-examples.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/creating-external-tables-examples.html.md.erb 
b/datamgmt/load/creating-external-tables-examples.html.md.erb
deleted file mode 100644
index 8cdbff1..0000000
--- a/datamgmt/load/creating-external-tables-examples.html.md.erb
+++ /dev/null
@@ -1,117 +0,0 @@
----
-title: Creating External Tables - Examples
----
-
-The following examples show how to define external data with different 
protocols. Each `CREATE EXTERNAL TABLE` command can contain only one protocol.
-
-**Note:** When using IPv6, always enclose the numeric IP addresses in square 
brackets.
-
-Start `gpfdist` before you create external tables with the `gpfdist` protocol. 
The following code starts the `gpfdist` file server program in the background 
on port *8081* serving files from directory `/var/data/staging`. The logs are 
saved in `/home/gpadmin/log`.
-
-``` shell
-$ gpfdist -p 8081 -d /var/data/staging -l /home/gpadmin/log &
-```
-
-## <a id="ex1"></a>Example 1 - Single gpfdist instance on single-NIC machine
-
-Creates a readable external table, `ext_expenses`, using the `gpfdist` 
protocol. The files are formatted with a pipe (|) as the column delimiter.
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses
-        ( name text, date date, amount float4, category text, desc1 text )
-    LOCATION ('gpfdist://etlhost-1:8081/*', 'gpfdist://etlhost-1:8082/*')
-    FORMAT 'TEXT' (DELIMITER '|');
-```
-
-## <a id="ex2"></a>Example 2 - Multiple gpfdist instances
-
-Creates a readable external table, *ext\_expenses*, using the `gpfdist` 
protocol from all files with the *txt* extension. The column delimiter is a 
pipe ( | ) and NULL is a space (' ').
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses
-        ( name text, date date, amount float4, category text, desc1 text )
-    LOCATION ('gpfdist://etlhost-1:8081/*.txt', 
'gpfdist://etlhost-2:8081/*.txt')
-    FORMAT 'TEXT' ( DELIMITER '|' NULL ' ') ;
-    
-```
-
-## <a id="ex3"></a>Example 3 - Multiple gpfdists instances
-
-Creates a readable external table, *ext\_expenses,* from all files with the 
*txt* extension using the `gpfdists` protocol. The column delimiter is a pipe ( 
| ) and NULL is a space (' '). For information about the location of security 
certificates, see [gpfdists Protocol](g-gpfdists-protocol.html).
-
-1.  Run `gpfdist` with the `--ssl` option.
-2.  Run the following command.
-
-    ``` sql
-    =# CREATE EXTERNAL TABLE ext_expenses
-             ( name text, date date, amount float4, category text, desc1 text )
-        LOCATION ('gpfdists://etlhost-1:8081/*.txt', 
'gpfdists://etlhost-2:8082/*.txt')
-        FORMAT 'TEXT' ( DELIMITER '|' NULL ' ') ;
-        
-    ```
-
-## <a id="ex4"></a>Example 4 - Single gpfdist instance with error logging
-
-Uses the gpfdist protocol to create a readable external table, `ext_expenses,` 
from all files with the *txt* extension. The column delimiter is a pipe ( | ) 
and NULL (' ') is a space.
-
-Access to the external table is single row error isolation mode. Input data 
formatting errors can be captured so that you can view the errors, fix the 
issues, and then reload the rejected data. If the error count on a segment is 
greater than five (the `SEGMENT REJECT LIMIT` value), the entire external table 
operation fails and no rows are processed.
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses
-         ( name text, date date, amount float4, category text, desc1 text )
-    LOCATION ('gpfdist://etlhost-1:8081/*.txt', 
'gpfdist://etlhost-2:8082/*.txt')
-    FORMAT 'TEXT' ( DELIMITER '|' NULL ' ')
-    LOG ERRORS INTO expenses_errs SEGMENT REJECT LIMIT 5;
-    
-```
-
-To create the readable `ext_expenses` table from CSV-formatted text files:
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses
-         ( name text, date date, amount float4, category text, desc1 text )
-    LOCATION ('gpfdist://etlhost-1:8081/*.txt', 
'gpfdist://etlhost-2:8082/*.txt')
-    FORMAT 'CSV' ( DELIMITER ',' )
-    LOG ERRORS INTO expenses_errs SEGMENT REJECT LIMIT 5;
-    
-```
-
-## <a id="ex5"></a>Example 5 - Readable Web External Table with Script
-
-Creates a readable web external table that executes a script once on five 
virtual segments:
-
-``` sql
-=# CREATE EXTERNAL WEB TABLE log_output (linenum int, message text)
-    EXECUTE '/var/load_scripts/get_log_data.sh' ON 5
-    FORMAT 'TEXT' (DELIMITER '|');
-    
-```
-
-## <a id="ex6"></a>Example 6 - Writable External Table with gpfdist
-
-Creates a writable external table, *sales\_out*, that uses `gpfdist` to write 
output data to the file *sales.out*. The column delimiter is a pipe ( | ) and 
NULL is a space (' '). The file will be created in the directory specified when 
you started the gpfdist file server.
-
-``` sql
-=# CREATE WRITABLE EXTERNAL TABLE sales_out (LIKE sales)
-    LOCATION ('gpfdist://etl1:8081/sales.out')
-    FORMAT 'TEXT' ( DELIMITER '|' NULL ' ')
-    DISTRIBUTED BY (txn_id);
-    
-```
-
-## <a id="ex7"></a>Example 7 - Writable External Web Table with Script
-
-Creates a writable external web table, `campaign_out`, that pipes output data 
recieved by the segments to an executable script, `to_adreport_etl.sh`:
-
-``` sql
-=# CREATE WRITABLE EXTERNAL WEB TABLE campaign_out
-        (LIKE campaign)
-        EXECUTE '/var/unload_scripts/to_adreport_etl.sh' ON 6
-        FORMAT 'TEXT' (DELIMITER '|');
-```
-
-## <a id="ex8"></a>Example 8 - Readable and Writable External Tables with XML 
Transformations
-
-HAWQ can read and write XML data to and from external tables with gpfdist. For 
information about setting up an XML transform, see [Transforming XML 
Data](g-transforming-xml-data.html#topic75).
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-about-gpfdist-setup-and-performance.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-about-gpfdist-setup-and-performance.html.md.erb 
b/datamgmt/load/g-about-gpfdist-setup-and-performance.html.md.erb
deleted file mode 100644
index 28a0bfe..0000000
--- a/datamgmt/load/g-about-gpfdist-setup-and-performance.html.md.erb
+++ /dev/null
@@ -1,22 +0,0 @@
----
-title: About gpfdist Setup and Performance
----
-
-Consider the following scenarios for optimizing your ETL network performance.
-
--   Allow network traffic to use all ETL host Network Interface Cards (NICs) 
simultaneously. Run one instance of `gpfdist` on the ETL host, then declare the 
host name of each NIC in the `LOCATION` clause of your external table 
definition (see [Creating External Tables - 
Examples](creating-external-tables-examples.html#topic44)).
-
-<a id="topic14__du165872"></a>
-<span class="figtitleprefix">Figure: </span>External Table Using Single 
gpfdist Instance with Multiple NICs
-
-<img src="../../images/ext_tables_multinic.jpg" class="image" width="472" 
height="271" />
-
--   Divide external table data equally among multiple `gpfdist` instances on 
the ETL host. For example, on an ETL system with two NICs, run two `gpfdist` 
instances (one on each NIC) to optimize data load performance and divide the 
external table data files evenly between the two `gpfdists`.
-
-<a id="topic14__du165882"></a>
-
-<span class="figtitleprefix">Figure: </span>External Tables Using Multiple 
gpfdist Instances with Multiple NICs
-
-<img src="../../images/ext_tables.jpg" class="image" width="467" height="282" 
/>
-
-**Note:** Use pipes (|) to separate formatted text when you submit files to 
`gpfdist`. HAWQ encloses comma-separated text strings in single or double 
quotes. `gpfdist` has to remove the quotes to parse the strings. Using pipes to 
separate formatted text avoids the extra step and improves performance.

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-character-encoding.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-character-encoding.html.md.erb 
b/datamgmt/load/g-character-encoding.html.md.erb
deleted file mode 100644
index 9f3756d..0000000
--- a/datamgmt/load/g-character-encoding.html.md.erb
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Character Encoding
----
-
-Character encoding systems consist of a code that pairs each character from a 
character set with something else, such as a sequence of numbers or octets, to 
facilitate data stransmission and storage. HAWQ supports a variety of character 
sets, including single-byte character sets such as the ISO 8859 series and 
multiple-byte character sets such as EUC (Extended UNIX Code), UTF-8, and Mule 
internal code. Clients can use all supported character sets transparently, but 
a few are not supported for use within the server as a server-side encoding.
-
-Data files must be in a character encoding recognized by HAWQ. Data files that 
contain invalid or unsupported encoding sequences encounter errors when loading 
into HAWQ.
-
-**Note:** On data files generated on a Microsoft Windows operating system, run 
the `dos2unix` system command to remove any Windows-only characters before 
loading into HAWQ.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-command-based-web-external-tables.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-command-based-web-external-tables.html.md.erb 
b/datamgmt/load/g-command-based-web-external-tables.html.md.erb
deleted file mode 100644
index 7830cc3..0000000
--- a/datamgmt/load/g-command-based-web-external-tables.html.md.erb
+++ /dev/null
@@ -1,26 +0,0 @@
----
-title: Command-based Web External Tables
----
-
-The output of a shell command or script defines command-based web table data. 
Specify the command in the `EXECUTE` clause of `CREATE EXTERNAL WEB             
    TABLE`. The data is current as of the time the command runs. The `EXECUTE` 
clause runs the shell command or script on the specified master or virtual 
segments. The virtual segments run the command in parallel. Scripts must be 
executable by the gpadmin user and reside in the same location on the master or 
the hosts of virtual segments.
-
-The command that you specify in the external table definition executes from 
the database and cannot access environment variables from `.bashrc` or 
`.profile`. Set environment variables in the `EXECUTE` clause. The following 
external web table, for example, runs a command on the HAWQ master host:
-
-``` sql
-CREATE EXTERNAL WEB TABLE output (output text)
-EXECUTE 'PATH=/home/gpadmin/programs; export PATH; myprogram.sh'
-    ON MASTER 
-FORMAT 'TEXT';
-```
-
-The following command defines a web table that runs a script on five virtual 
segments.
-
-``` sql
-CREATE EXTERNAL WEB TABLE log_output (linenum int, message text) 
-EXECUTE '/var/load_scripts/get_log_data.sh' ON 5 
-FORMAT 'TEXT' (DELIMITER '|');
-```
-
-The virtual segments are selected by the resource manager at runtime.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-configuration-file-format.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-configuration-file-format.html.md.erb 
b/datamgmt/load/g-configuration-file-format.html.md.erb
deleted file mode 100644
index 73f51a9..0000000
--- a/datamgmt/load/g-configuration-file-format.html.md.erb
+++ /dev/null
@@ -1,66 +0,0 @@
----
-title: Configuration File Format
----
-
-The `gpfdist` configuration file uses the YAML 1.1 document format and 
implements a schema for defining the transformation parameters. The 
configuration file must be a valid YAML document.
-
-The `gpfdist` program processes the document in order and uses indentation 
(spaces) to determine the document hierarchy and relationships of the sections 
to one another. The use of white space is significant. Do not use white space 
for formatting and do not use tabs.
-
-The following is the basic structure of a configuration file.
-
-``` pre
----
-VERSION:   1.0.0.1
-TRANSFORMATIONS: 
-transformation_name1:
-TYPE:      input | output
-COMMAND:   command
-CONTENT:   data | paths
-SAFE:      posix-regex
-STDERR:    server | console
-transformation_name2:
-TYPE:      input | output
-COMMAND:   command 
-...
-```
-
-VERSION  
-Required. The version of the `gpfdist` configuration file schema. The current 
version is 1.0.0.1.
-
-TRANSFORMATIONS  
-Required. Begins the transformation specification section. A configuration 
file must have at least one transformation. When `gpfdist` receives a 
transformation request, it looks in this section for an entry with the matching 
transformation name.
-
-TYPE  
-Required. Specifies the direction of transformation. Values are `input` or 
`output`.
-
--   `input`: `gpfdist` treats the standard output of the transformation 
process as a stream of records to load into HAWQ.
--   `output` <span class="ph">: </span> `gpfdist` treats the standard input of 
the transformation process as a stream of records from HAWQ to transform and 
write to the appropriate output.
-
-COMMAND  
-Required. Specifies the command `gpfdist` will execute to perform the 
transformation.
-
-For input transformations, `gpfdist` invokes the command specified in the 
`CONTENT` setting. The command is expected to open the underlying file(s) as 
appropriate and produce one line of `TEXT` for each row to load into HAWQ 
/&gt;. The input transform determines whether the entire content should be 
converted to one row or to multiple rows.
-
-For output transformations, `gpfdist` invokes this command as specified in the 
`CONTENT` setting. The output command is expected to open and write to the 
underlying file(s) as appropriate. The output transformation determines the 
final placement of the converted output.
-
-CONTENT  
-Optional. The values are `data` and `paths`. The default value is `data`.
-
--   When `CONTENT` specifies `data`, the text `%filename%` in the `COMMAND` 
section is replaced by the path to the file to read or write.
--   When `CONTENT` specifies `paths`, the text `%filename%` in the `COMMAND` 
section is replaced by the path to the temporary file that contains the list of 
files to read or write.
-
-The following is an example of a `COMMAND` section showing the text 
`%filename%` that is replaced.
-
-``` pre
-COMMAND: /bin/bash input_transform.sh %filename%
-```
-
-SAFE  
-Optional. A `POSIX `regular expression that the paths must match to be passed 
to the transformation. Specify `SAFE` when there is a concern about injection 
or improper interpretation of paths passed to the command. The default is no 
restriction on paths.
-
-STDERR  
-Optional.The values are `server` and `console`.
-
-This setting specifies how to handle standard error output from the 
transformation. The default, `server`, specifies that `gpfdist` will capture 
the standard error output from the transformation in a temporary file and send 
the first 8k of that file to HAWQ as an error message. The error message will 
appear as a SQL error. `Console` specifies that `gpfdist` does not redirect or 
transmit the standard error output from the transformation.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-controlling-segment-parallelism.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-controlling-segment-parallelism.html.md.erb 
b/datamgmt/load/g-controlling-segment-parallelism.html.md.erb
deleted file mode 100644
index 4e0096c..0000000
--- a/datamgmt/load/g-controlling-segment-parallelism.html.md.erb
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Controlling Segment Parallelism
----
-
-The `gp_external_max_segs` server configuration parameter controls the number 
of virtual segments that can simultaneously access a single `gpfdist` instance. 
The default is 64. You can set the number of segments such that some segments 
process external data files and some perform other database processing. Set 
this parameter in the `hawq-site.xml` file of your master instance.
-
-The number of segments in the `gpfdist` location list specify the minimum 
number of virtual segments required to serve data to a `gpfdist` external table.
-
-The `hawq_rm_nvseg_perquery_perseg_limit` and `hawq_rm_nvseg_perquery_limit` 
parameters also control segment parallelism by specifying the maximum number of 
segments used in running queries on a `gpfdist` external table on the cluster.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-create-an-error-table-and-declare-a-reject-limit.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-create-an-error-table-and-declare-a-reject-limit.html.md.erb 
b/datamgmt/load/g-create-an-error-table-and-declare-a-reject-limit.html.md.erb
deleted file mode 100644
index ade14ea..0000000
--- 
a/datamgmt/load/g-create-an-error-table-and-declare-a-reject-limit.html.md.erb
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Capture Row Formatting Errors and Declare a Reject Limit
----
-
-The following SQL fragment captures formatting errors internally in HAWQ and 
declares a reject limit of 10 rows.
-
-``` sql
-LOG ERRORS INTO errortable SEGMENT REJECT LIMIT 10 ROWS
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-creating-and-using-web-external-tables.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-creating-and-using-web-external-tables.html.md.erb 
b/datamgmt/load/g-creating-and-using-web-external-tables.html.md.erb
deleted file mode 100644
index 4ef6cab..0000000
--- a/datamgmt/load/g-creating-and-using-web-external-tables.html.md.erb
+++ /dev/null
@@ -1,13 +0,0 @@
----
-title: Creating and Using Web External Tables
----
-
-`CREATE EXTERNAL WEB TABLE` creates a web table definition. Web external 
tables allow HAWQ to treat dynamic data sources like regular database tables. 
Because web table data can change as a query runs, the data is not rescannable.
-
-You can define command-based or URL-based web external tables. The definition 
forms are distinct: you cannot mix command-based and URL-based definitions.
-
--   **[Command-based Web External 
Tables](../../datamgmt/load/g-command-based-web-external-tables.html)**
-
--   **[URL-based Web External 
Tables](../../datamgmt/load/g-url-based-web-external-tables.html)**
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-define-an-external-table-with-single-row-error-isolation.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-define-an-external-table-with-single-row-error-isolation.html.md.erb
 
b/datamgmt/load/g-define-an-external-table-with-single-row-error-isolation.html.md.erb
deleted file mode 100644
index e0c3c17..0000000
--- 
a/datamgmt/load/g-define-an-external-table-with-single-row-error-isolation.html.md.erb
+++ /dev/null
@@ -1,24 +0,0 @@
----
-title: Define an External Table with Single Row Error Isolation
----
-
-The following example logs errors internally in HAWQ and sets an error 
threshold of 10 errors.
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses ( name text, date date, amount float4, 
category text, desc1 text )
-   LOCATION ('gpfdist://etlhost-1:8081/*', 'gpfdist://etlhost-2:8082/*')
-   FORMAT 'TEXT' (DELIMITER '|')
-   LOG ERRORS INTO errortable SEGMENT REJECT LIMIT 10 ROWS;
-```
-
-The following example creates an external table, *ext\_expenses*, sets an 
error threshold of 10 errors, and writes error rows to the table 
*err\_expenses*.
-
-``` sql
-=# CREATE EXTERNAL TABLE ext_expenses
-     ( name text, date date, amount float4, category text, desc1 text )
-   LOCATION ('gpfdist://etlhost-1:8081/*', 'gpfdist://etlhost-2:8082/*')
-   FORMAT 'TEXT' (DELIMITER '|')
-   LOG ERRORS INTO err_expenses SEGMENT REJECT LIMIT 10 ROWS;
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-defining-a-command-based-writable-external-web-table.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-defining-a-command-based-writable-external-web-table.html.md.erb
 
b/datamgmt/load/g-defining-a-command-based-writable-external-web-table.html.md.erb
deleted file mode 100644
index 8a24474..0000000
--- 
a/datamgmt/load/g-defining-a-command-based-writable-external-web-table.html.md.erb
+++ /dev/null
@@ -1,43 +0,0 @@
----
-title: Defining a Command-Based Writable External Web Table
----
-
-You can define writable external web tables to send output rows to an 
application or script. The application must accept an input stream, reside in 
the same location on all of the HAWQ segment hosts, and be executable by the 
`gpadmin` user. All segments in the HAWQ system run the application or script, 
whether or not a segment has output rows to process.
-
-Use `CREATE WRITABLE EXTERNAL WEB TABLE` to define the external table and 
specify the application or script to run on the segment hosts. Commands execute 
from within the database and cannot access environment variables (such as 
`$PATH`). Set environment variables in the `EXECUTE` clause of your writable 
external table definition. For example:
-
-``` sql
-=# CREATE WRITABLE EXTERNAL WEB TABLE output (output text) 
-    EXECUTE 'export PATH=$PATH:/home/gpadmin/programs; myprogram.sh' 
-    ON 6
-    FORMAT 'TEXT'
-    DISTRIBUTED RANDOMLY;
-```
-
-The following HAWQ variables are available for use in OS commands executed by 
a web or writable external table. Set these variables as environment variables 
in the shell that executes the command(s). They can be used to identify a set 
of requests made by an external table statement across the HAWQ array of hosts 
and segment instances.
-
-<caption><span class="tablecap">Table 1. External Table EXECUTE 
Variables</span></caption>
-
-<a id="topic71__du224024"></a>
-
-| Variable            | Description                                            
                                                                    |
-|---------------------|----------------------------------------------------------------------------------------------------------------------------|
-| $GP\_CID            | Command count of the transaction executing the 
external table statement.                                                   |
-| $GP\_DATABASE       | The database in which the external table definition 
resides.                                                               |
-| $GP\_DATE           | The date on which the external table command ran.      
                                                                    |
-| $GP\_MASTER\_HOST   | The host name of the HAWQ master host from which the 
external table statement was dispatched.                              |
-| $GP\_MASTER\_PORT   | The port number of the HAWQ master instance from which 
the external table statement was dispatched.                        |
-| $GP\_SEG\_DATADIR   | The location of the data directory of the segment 
instance executing the external table command.                           |
-| $GP\_SEG\_PG\_CONF  | The location of the `hawq-site.xml` file of the 
segment instance executing the external table command.                     |
-| $GP\_SEG\_PORT      | The port number of the segment instance executing the 
external table command.                                              |
-| $GP\_SEGMENT\_COUNT | The total number of segment instances in the HAWQ 
system.                                                                  |
-| $GP\_SEGMENT\_ID    | The ID number of the segment instance executing the 
external table command (same as `dbid` in `gp_segment_configuration`). |
-| $GP\_SESSION\_ID    | The database session identifier number associated with 
the external table statement.                                       |
-| $GP\_SN             | Serial number of the external table scan node in the 
query plan of the external table statement.                           |
-| $GP\_TIME           | The time the external table command was executed.      
                                                                    |
-| $GP\_USER           | The database user executing the external table 
statement.                                                                  |
-| $GP\_XID            | The transaction ID of the external table statement.    
                                                                    |
-
--   **[Disabling EXECUTE for Web or Writable External 
Tables](../../datamgmt/load/g-disabling-execute-for-web-or-writable-external-tables.html)**
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-defining-a-file-based-writable-external-table.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-defining-a-file-based-writable-external-table.html.md.erb 
b/datamgmt/load/g-defining-a-file-based-writable-external-table.html.md.erb
deleted file mode 100644
index fa1ddfa..0000000
--- a/datamgmt/load/g-defining-a-file-based-writable-external-table.html.md.erb
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: Defining a File-Based Writable External Table
----
-
-Writable external tables that output data to files use the HAWQ parallel file 
server program, `gpfdist`, or HAWQ Extensions Framework (PXF).
-
-Use the `CREATE WRITABLE EXTERNAL TABLE` command to define the external table 
and specify the location and format of the output files.
-
--   With a writable external table using the `gpfdist` protocol, the HAWQ 
segments send their data to `gpfdist`, which writes the data to the named file. 
`gpfdist` must run on a host that the HAWQ segments can access over the 
network. `gpfdist` points to a file location on the output host and writes data 
received from the HAWQ segments to the file. To divide the output data among 
multiple files, list multiple `gpfdist` URIs in your writable external table 
definition.
--   A writable external web table sends data to an application as a stream of 
data. For example, unload data from HAWQ and send it to an application that 
connects to another database or ETL tool to load the data elsewhere. Writable 
external web tables use the `EXECUTE` clause to specify a shell command, 
script, or application to run on the segment hosts and accept an input stream 
of data. See [Defining a Command-Based Writable External Web 
Table](g-defining-a-command-based-writable-external-web-table.html#topic71) for 
more information about using `EXECUTE` commands in a writable external table 
definition.
-
-You can optionally declare a distribution policy for your writable external 
tables. By default, writable external tables use a random distribution policy. 
If the source table you are exporting data from has a hash distribution policy, 
defining the same distribution key column(s) for the writable external table 
improves unload performance by eliminating the requirement to move rows over 
the interconnect. If you unload data from a particular table, you can use the 
`LIKE` clause to copy the column definitions and distribution policy from the 
source table.
-
--   **[Example - HAWQ file server 
(gpfdist)](../../datamgmt/load/g-example-hawq-file-server-gpfdist.html)**
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-determine-the-transformation-schema.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-determine-the-transformation-schema.html.md.erb 
b/datamgmt/load/g-determine-the-transformation-schema.html.md.erb
deleted file mode 100644
index 1a4eb9b..0000000
--- a/datamgmt/load/g-determine-the-transformation-schema.html.md.erb
+++ /dev/null
@@ -1,33 +0,0 @@
----
-title: Determine the Transformation Schema
----
-
-To prepare for the transformation project:
-
-1.  <span class="ph">Determine the goal of the project, such as indexing data, 
analyzing data, combining data, and so on.</span>
-2.  <span class="ph">Examine the XML file and note the file structure and 
element names. </span>
-3.  <span class="ph">Choose the elements to import and decide if any other 
limits are appropriate. </span>
-
-For example, the following XML file, *prices.xml*, is a simple, short file 
that contains price records. Each price record contains two fields: an item 
number and a price.
-
-``` xml
-<?xml version="1.0" encoding="ISO-8859-1" ?>
-<prices>
-  <pricerecord>
-    <itemnumber>708421</itemnumber>
-    <price>19.99</price>
-  </pricerecord>
-  <pricerecord>
-    <itemnumber>708466</itemnumber>
-    <price>59.25</price>
-  </pricerecord>
-  <pricerecord>
-    <itemnumber>711121</itemnumber>
-    <price>24.99</price>
-  </pricerecord>
-</prices>
-```
-
-The goal is to import all the data into a HAWQ table with an integer 
`itemnumber` column and a decimal `price` column.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-disabling-execute-for-web-or-writable-external-tables.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-disabling-execute-for-web-or-writable-external-tables.html.md.erb
 
b/datamgmt/load/g-disabling-execute-for-web-or-writable-external-tables.html.md.erb
deleted file mode 100644
index f0332b5..0000000
--- 
a/datamgmt/load/g-disabling-execute-for-web-or-writable-external-tables.html.md.erb
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Disabling EXECUTE for Web or Writable External Tables
----
-
-There is a security risk associated with allowing external tables to execute 
OS commands or scripts. To disable the use of `EXECUTE` in web and writable 
external table definitions, set the `gp_external_enable_exec server` 
configuration parameter to off in your master `hawq-site.xml` file:
-
-``` pre
-gp_external_enable_exec = off
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-escaping-in-csv-formatted-files.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-escaping-in-csv-formatted-files.html.md.erb 
b/datamgmt/load/g-escaping-in-csv-formatted-files.html.md.erb
deleted file mode 100644
index d07b463..0000000
--- a/datamgmt/load/g-escaping-in-csv-formatted-files.html.md.erb
+++ /dev/null
@@ -1,29 +0,0 @@
----
-title: Escaping in CSV Formatted Files
----
-
-By default, the escape character is a `"` (double quote) for CSV-formatted 
files. If you want to use a different escape character, use the `ESCAPE` clause 
of `COPY`, `CREATE EXTERNAL TABLE` or the `hawq load` control file to declare a 
different escape character. In cases where your selected escape character is 
present in your data, you can use it to escape itself.
-
-For example, suppose you have a table with three columns and you want to load 
the following three fields:
-
--   `Free trip to A,B`
--   `5.89`
--   `Special rate "1.79"`
-
-Your designated delimiter character is `,` (comma), and your designated escape 
character is `"` (double quote). The formatted row in your data file looks like 
this:
-
-``` pre
-         "Free trip to A,B","5.89","Special rate ""1.79"""
-
-      
-```
-
-The data value with a comma character that is part of the data is enclosed in 
double quotes. The double quotes that are part of the data are escaped with a 
double quote even though the field value is enclosed in double quotes.
-
-Embedding the entire field inside a set of double quotes guarantees 
preservation of leading and trailing whitespace characters:
-
-`"`Free trip to A,B `"`,`"`5.89 `"`,`"`Special rate `""`1.79`""             "`
-
-**Note:** In CSV mode, all characters are significant. A quoted value 
surrounded by white space, or any characters other than `DELIMITER`, includes 
those characters. This can cause errors if you import data from a system that 
pads CSV lines with white space to some fixed width. In this case, preprocess 
the CSV file to remove the trailing white space before importing the data into 
HAWQ.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-escaping-in-text-formatted-files.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-escaping-in-text-formatted-files.html.md.erb 
b/datamgmt/load/g-escaping-in-text-formatted-files.html.md.erb
deleted file mode 100644
index e24a2b7..0000000
--- a/datamgmt/load/g-escaping-in-text-formatted-files.html.md.erb
+++ /dev/null
@@ -1,31 +0,0 @@
----
-title: Escaping in Text Formatted Files
----
-
-By default, the escape character is a \\ (backslash) for text-formatted files. 
You can declare a different escape character in the `ESCAPE` clause of `COPY`, 
`CREATE EXTERNAL TABLE`, or the `hawq             load` control file. If your 
escape character appears in your data, use it to escape itself.
-
-For example, suppose you have a table with three columns and you want to load 
the following three fields:
-
--   `backslash = \`
--   `vertical bar = |`
--   `exclamation point = !`
-
-Your designated delimiter character is `|` (pipe character), and your 
designated escape character is `\` (backslash). The formatted row in your data 
file looks like this:
-
-``` pre
-backslash = \\ | vertical bar = \| | exclamation point = !
-```
-
-Notice how the backslash character that is part of the data is escaped with 
another backslash character, and the pipe character that is part of the data is 
escaped with a backslash character.
-
-You can use the escape character to escape octal and hexidecimal sequences. 
The escaped value is converted to the equivalent character when loaded into 
HAWQ. For example, to load the ampersand character (`&`), use the escape 
character to escape its equivalent hexidecimal (`\0x26`) or octal (`\046`) 
representation.
-
-You can disable escaping in `TEXT`-formatted files using the `ESCAPE` clause 
of `COPY`, `CREATE EXTERNAL TABLE` or the `hawq load` control file as follows:
-
-``` pre
-ESCAPE 'OFF'
-```
-
-This is useful for input data that contains many backslash characters, such as 
web log data.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-escaping.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-escaping.html.md.erb 
b/datamgmt/load/g-escaping.html.md.erb
deleted file mode 100644
index 0a1e62a..0000000
--- a/datamgmt/load/g-escaping.html.md.erb
+++ /dev/null
@@ -1,16 +0,0 @@
----
-title: Escaping
----
-
-There are two reserved characters that have special meaning to HAWQ:
-
--   The designated delimiter character separates columns or fields in the data 
file.
--   The newline character designates a new row in the data file.
-
-If your data contains either of these characters, you must escape the 
character so that HAWQ treats it as data and not as a field separator or new 
row. By default, the escape character is a \\ (backslash) for text-formatted 
files and a double quote (") for csv-formatted files.
-
--   **[Escaping in Text Formatted 
Files](../../datamgmt/load/g-escaping-in-text-formatted-files.html)**
-
--   **[Escaping in CSV Formatted 
Files](../../datamgmt/load/g-escaping-in-csv-formatted-files.html)**
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-example-1-dblp-database-publications-in-demo-directory.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-example-1-dblp-database-publications-in-demo-directory.html.md.erb
 
b/datamgmt/load/g-example-1-dblp-database-publications-in-demo-directory.html.md.erb
deleted file mode 100644
index 4f61396..0000000
--- 
a/datamgmt/load/g-example-1-dblp-database-publications-in-demo-directory.html.md.erb
+++ /dev/null
@@ -1,29 +0,0 @@
----
-title: Command-based Web External Tables
----
-
-The output of a shell command or script defines command-based web table data. 
Specify the command in the `EXECUTE` clause of `CREATE EXTERNAL WEB             
    TABLE`. The data is current as of the time the command runs. The `EXECUTE` 
clause runs the shell command or script on the specified master, and/or segment 
host or hosts. The command or script must reside on the hosts corresponding to 
the host(s) defined in the `EXECUTE` clause.
-
-By default, the command is run on segment hosts when active segments have 
output rows to process. For example, if each segment host runs four primary 
segment instances that have output rows to process, the command runs four times 
per segment host. You can optionally limit the number of segment instances that 
execute the web table command. All segments included in the web table 
definition in the `ON` clause run the command in parallel.
-
-The command that you specify in the external table definition executes from 
the database and cannot access environment variables from `.bashrc` or 
`.profile`. Set environment variables in the `EXECUTE` clause. For example:
-
-``` sql
-=# CREATE EXTERNAL WEB TABLE output (output text)
-EXECUTE 'PATH=/home/gpadmin/programs; export PATH; myprogram.sh'
-    ON MASTER
-FORMAT 'TEXT';
-```
-
-Scripts must be executable by the `gpadmin` user and reside in the same 
location on the master or segment hosts.
-
-The following command defines a web table that runs a script. The script runs 
on five virtual segments selected by the resource manager at runtime.
-
-``` sql
-=# CREATE EXTERNAL WEB TABLE log_output
-(linenum int, message text)
-EXECUTE '/var/load_scripts/get_log_data.sh' ON 5
-FORMAT 'TEXT' (DELIMITER '|');
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-example-hawq-file-server-gpfdist.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-example-hawq-file-server-gpfdist.html.md.erb 
b/datamgmt/load/g-example-hawq-file-server-gpfdist.html.md.erb
deleted file mode 100644
index a0bf669..0000000
--- a/datamgmt/load/g-example-hawq-file-server-gpfdist.html.md.erb
+++ /dev/null
@@ -1,13 +0,0 @@
----
-title: Example - HAWQ file server (gpfdist)
----
-
-``` sql
-=# CREATE WRITABLE EXTERNAL TABLE unload_expenses
-( LIKE expenses )
-LOCATION ('gpfdist://etlhost-1:8081/expenses1.out',
-'gpfdist://etlhost-2:8081/expenses2.out')
-FORMAT 'TEXT' (DELIMITER ',');
-```
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-example-irs-mef-xml-files-in-demo-directory.html.md.erb
----------------------------------------------------------------------
diff --git 
a/datamgmt/load/g-example-irs-mef-xml-files-in-demo-directory.html.md.erb 
b/datamgmt/load/g-example-irs-mef-xml-files-in-demo-directory.html.md.erb
deleted file mode 100644
index 6f5b9e3..0000000
--- a/datamgmt/load/g-example-irs-mef-xml-files-in-demo-directory.html.md.erb
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: Example using IRS MeF XML Files (In demo Directory)
----
-
-This example demonstrates loading a sample IRS Modernized eFile tax return 
using a Joost STX transformation. The data is in the form of a complex XML file.
-
-The U.S. Internal Revenue Service (IRS) made a significant commitment to XML 
and specifies its use in its Modernized e-File (MeF) system. In MeF, each tax 
return is an XML document with a deep hierarchical structure that closely 
reflects the particular form of the underlying tax code.
-
-XML, XML Schema and stylesheets play a role in their data representation and 
business workflow. The actual XML data is extracted from a ZIP file attached to 
a MIME "transmission file" message. For more information about MeF, see 
[Modernized e-File 
(Overview)](http://www.irs.gov/uac/Modernized-e-File-Overview) on the IRS web 
site.
-
-The sample XML document, *RET990EZ\_2006.xml*, is about 350KB in size with two 
elements:
-
--   ReturnHeader
--   ReturnData
-
-The &lt;ReturnHeader&gt; element contains general details about the tax return 
such as the taxpayer's name, the tax year of the return, and the preparer. The 
&lt;ReturnData&gt; element contains multiple sections with specific details 
about the tax return and associated schedules.
-
-The following is an abridged sample of the XML file.
-
-``` xml
-<?xml version="1.0" encoding="UTF-8"?> 
-<Return returnVersion="2006v2.0"
-   xmlns="http://www.irs.gov/efile"; 
-   xmlns:efile="http://www.irs.gov/efile";
-   xsi:schemaLocation="http://www.irs.gov/efile";
-   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";> 
-   <ReturnHeader binaryAttachmentCount="1">
-     <ReturnId>AAAAAAAAAAAAAAAAAAAA</ReturnId>
-     <Timestamp>1999-05-30T12:01:01+05:01</Timestamp>
-     <ReturnType>990EZ</ReturnType>
-     <TaxPeriodBeginDate>2005-01-01</TaxPeriodBeginDate>
-     <TaxPeriodEndDate>2005-12-31</TaxPeriodEndDate>
-     <Filer>
-       <EIN>011248772</EIN>
-       ... more data ...
-     </Filer>
-     <Preparer>
-       <Name>Percy Polar</Name>
-       ... more data ...
-     </Preparer>
-     <TaxYear>2005</TaxYear>
-   </ReturnHeader>
-   ... more data ..
-```
-
-The goal is to import all the data into a HAWQ database. First, convert the 
XML document into text with newlines "escaped", with two columns: `ReturnId` 
and a single column on the end for the entire MeF tax return. For example:
-
-``` pre
-AAAAAAAAAAAAAAAAAAAA|<Return returnVersion="2006v2.0"... 
-```
-
-Load the data into HAWQ.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-example-witsml-files-in-demo-directory.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-example-witsml-files-in-demo-directory.html.md.erb 
b/datamgmt/load/g-example-witsml-files-in-demo-directory.html.md.erb
deleted file mode 100644
index 0484523..0000000
--- a/datamgmt/load/g-example-witsml-files-in-demo-directory.html.md.erb
+++ /dev/null
@@ -1,54 +0,0 @@
----
-title: Example using WITSML™ Files (In demo Directory)
----
-
-This example demonstrates loading sample data describing an oil rig using a 
Joost STX transformation. The data is in the form of a complex XML file 
downloaded from energistics.org.
-
-The Wellsite Information Transfer Standard Markup Language (WITSML™) is an 
oil industry initiative to provide open, non-proprietary, standard interfaces 
for technology and software to share information among oil companies, service 
companies, drilling contractors, application vendors, and regulatory agencies. 
For more information about WITSML™, see 
[http://www.witsml.org](http://www.witsml.org).
-
-The oil rig information consists of a top level `<rigs>` element with multiple 
child elements such as `<documentInfo>,                             <rig>`, and 
so on. The following excerpt from the file shows the type of information in the 
`<rig>` tag.
-
-``` xml
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet href="../stylesheets/rig.xsl" type="text/xsl" media="screen"?>
-<rigs 
- xmlns="http://www.witsml.org/schemas/131"; 
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
- xsi:schemaLocation="http://www.witsml.org/schemas/131 ../obj_rig.xsd" 
- version="1.3.1.1">
- <documentInfo>
- ... misc data ...
- </documentInfo>
- <rig uidWell="W-12" uidWellbore="B-01" uid="xr31">
-     <nameWell>6507/7-A-42</nameWell>
-     <nameWellbore>A-42</nameWellbore>
-     <name>Deep Drill #5</name>
-     <owner>Deep Drilling Co.</owner>
-     <typeRig>floater</typeRig>
-     <manufacturer>Fitsui Engineering</manufacturer>
-     <yearEntService>1980</yearEntService>
-     <classRig>ABS Class A1 M CSDU AMS ACCU</classRig>
-     <approvals>DNV</approvals>
- ... more data ...
-```
-
-The goal is to import the information for this rig into HAWQ.
-
-The sample document, *rig.xml*, is about 11KB in size. The input does not 
contain tabs so the relevant information can be converted into records 
delimited with a pipe (|).
-
-`W-12|6507/7-A-42|xr31|Deep Drill #5|Deep Drilling Co.|John                    
         Doe|john....@example.com|`
-
-With the columns:
-
--   `well_uid text`, -- e.g. W-12
--   `well_name text`, -- e.g. 6507/7-A-42
--   `rig_uid text`, -- e.g. xr31
--   `rig_name text`, -- e.g. Deep Drill \#5
--   `rig_owner text`, -- e.g. Deep Drilling Co.
--   `rig_contact text`, -- e.g. John Doe
--   `rig_email text`, -- e.g. john....@example.com
--   `doc xml`
-
-Then, load the data into HAWQ.
-
-

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/de1e2e07/datamgmt/load/g-examples-read-fixed-width-data.html.md.erb
----------------------------------------------------------------------
diff --git a/datamgmt/load/g-examples-read-fixed-width-data.html.md.erb 
b/datamgmt/load/g-examples-read-fixed-width-data.html.md.erb
deleted file mode 100644
index 174529a..0000000
--- a/datamgmt/load/g-examples-read-fixed-width-data.html.md.erb
+++ /dev/null
@@ -1,37 +0,0 @@
----
-title: Examples - Read Fixed-Width Data
----
-
-The following examples show how to read fixed-width data.
-
-## Example 1 – Loading a table with PRESERVED\_BLANKS on
-
-``` sql
-CREATE READABLE EXTERNAL TABLE students (
-  name varchar(20), address varchar(30), age int)
-LOCATION ('gpfdist://host:port/file/path/')
-FORMAT 'CUSTOM' (formatter=fixedwidth_in, name=20, address=30, age=4,
-        preserve_blanks='on',null='NULL');
-```
-
-## Example 2 – Loading data with no line delimiter
-
-``` sql
-CREATE READABLE EXTERNAL TABLE students (
-  name varchar(20), address varchar(30), age int)
-LOCATION ('gpfdist://host:port/file/path/')
-FORMAT 'CUSTOM' (formatter=fixedwidth_in, name='20', address='30', age='4', 
-        line_delim='?@');
-```
-
-## Example 3 – Create a writable external table with a \\r\\n line delimiter
-
-``` sql
-CREATE WRITABLE EXTERNAL TABLE students_out (
-  name varchar(20), address varchar(30), age int)
-LOCATION ('gpfdist://host:port/file/path/filename')     
-FORMAT 'CUSTOM' (formatter=fixedwidth_out, 
-   name=20, address=30, age=4, line_delim=E'\r\n');
-```
-
-


Reply via email to