Slightly modified patch applied.  This is my last planned pg_upgrade
change for 9.3.

---------------------------------------------------------------------------

On Mon, Jan  7, 2013 at 10:51:21PM -0500, Bruce Momjian wrote:
> Pg_upgrade by default (without --link) copies heap/index files from the
> old to new cluster.  This patch implements parallel heap/index file
> copying in pg_upgrade using the --jobs option.  It uses the same
> infrastructure used for pg_upgrade parallel dump/restore.  Here are the
> performance results:
> 
>                  --- seconds ---
>          GB        git    patched
>           2       62.09    63.75
>           4       95.93   107.22
>           8      194.96   195.29
>          16      494.38   348.93
>          32      983.28   644.23
>          64     2227.73  1244.08
>         128     4735.83  2547.09
> 
> Because of the kernel cache, you only see a big win when the amount of
> copy data exceeds the kernel cache.  For testing, I used a 24GB, 16-core
> machine with two magnetic disks with one tablespace on each.  Using more
> tablespaces would yield larger improvements.  My test script is
> attached.  
> 
> I consider this patch ready for application.  This is the last
> pg_upgrade performance improvement idea I am considering.
> 
> -- 
>   Bruce Momjian  <br...@momjian.us>        http://momjian.us
>   EnterpriseDB                             http://enterprisedb.com
> 
>   + It's impossible for everything to be true. +

> diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
> new file mode 100644
> index 59f8fd0..1780788
> *** a/contrib/pg_upgrade/check.c
> --- b/contrib/pg_upgrade/check.c
> *************** create_script_for_old_cluster_deletion(c
> *** 606,612 ****
>       fprintf(script, RMDIR_CMD " %s\n", 
> fix_path_separator(old_cluster.pgdata));
>   
>       /* delete old cluster's alternate tablespaces */
> !     for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++)
>       {
>               /*
>                * Do the old cluster's per-database directories share a 
> directory
> --- 606,612 ----
>       fprintf(script, RMDIR_CMD " %s\n", 
> fix_path_separator(old_cluster.pgdata));
>   
>       /* delete old cluster's alternate tablespaces */
> !     for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
>       {
>               /*
>                * Do the old cluster's per-database directories share a 
> directory
> *************** create_script_for_old_cluster_deletion(c
> *** 621,634 ****
>                       /* remove PG_VERSION? */
>                       if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
>                               fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
> !                                             
> fix_path_separator(os_info.tablespaces[tblnum]), 
>                                               
> fix_path_separator(old_cluster.tablespace_suffix),
>                                               PATH_SEPARATOR);
>   
>                       for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
>                       {
>                               fprintf(script, RMDIR_CMD " %s%s%c%d\n",
> !                                             
> fix_path_separator(os_info.tablespaces[tblnum]),
>                                               
> fix_path_separator(old_cluster.tablespace_suffix),
>                                               PATH_SEPARATOR, 
> old_cluster.dbarr.dbs[dbnum].db_oid);
>                       }
> --- 621,634 ----
>                       /* remove PG_VERSION? */
>                       if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
>                               fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
> !                                             
> fix_path_separator(os_info.old_tablespaces[tblnum]), 
>                                               
> fix_path_separator(old_cluster.tablespace_suffix),
>                                               PATH_SEPARATOR);
>   
>                       for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
>                       {
>                               fprintf(script, RMDIR_CMD " %s%s%c%d\n",
> !                                             
> fix_path_separator(os_info.old_tablespaces[tblnum]),
>                                               
> fix_path_separator(old_cluster.tablespace_suffix),
>                                               PATH_SEPARATOR, 
> old_cluster.dbarr.dbs[dbnum].db_oid);
>                       }
> *************** create_script_for_old_cluster_deletion(c
> *** 640,646 ****
>                        * or a version-specific subdirectory.
>                        */
>                       fprintf(script, RMDIR_CMD " %s%s\n",
> !                                     
> fix_path_separator(os_info.tablespaces[tblnum]), 
>                                       
> fix_path_separator(old_cluster.tablespace_suffix));
>       }
>   
> --- 640,646 ----
>                        * or a version-specific subdirectory.
>                        */
>                       fprintf(script, RMDIR_CMD " %s%s\n",
> !                                     
> fix_path_separator(os_info.old_tablespaces[tblnum]), 
>                                       
> fix_path_separator(old_cluster.tablespace_suffix));
>       }
>   
> diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
> new file mode 100644
> index 0c11ff8..7fd4584
> *** a/contrib/pg_upgrade/info.c
> --- b/contrib/pg_upgrade/info.c
> *************** create_rel_filename_map(const char *old_
> *** 106,125 ****
>                * relation belongs to the default tablespace, hence relfiles 
> should
>                * exist in the data directories.
>                */
> !             snprintf(map->old_dir, sizeof(map->old_dir), "%s/base/%u", 
> old_data,
> !                              old_db->db_oid);
> !             snprintf(map->new_dir, sizeof(map->new_dir), "%s/base/%u", 
> new_data,
> !                              new_db->db_oid);
>       }
>       else
>       {
>               /* relation belongs to a tablespace, so use the tablespace 
> location */
> !             snprintf(map->old_dir, sizeof(map->old_dir), "%s%s/%u", 
> old_rel->tablespace,
> !                              old_cluster.tablespace_suffix, old_db->db_oid);
> !             snprintf(map->new_dir, sizeof(map->new_dir), "%s%s/%u", 
> new_rel->tablespace,
> !                              new_cluster.tablespace_suffix, new_db->db_oid);
>       }
>   
>       /*
>        * old_relfilenode might differ from pg_class.oid (and hence
>        * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL.
> --- 106,130 ----
>                * relation belongs to the default tablespace, hence relfiles 
> should
>                * exist in the data directories.
>                */
> !             strlcpy(map->old_tablespace, old_data, 
> sizeof(map->old_tablespace));
> !             strlcpy(map->new_tablespace, new_data, 
> sizeof(map->new_tablespace));
> !             strlcpy(map->old_tablespace_suffix, "/base", 
> sizeof(map->old_tablespace_suffix));
> !             strlcpy(map->new_tablespace_suffix, "/base", 
> sizeof(map->new_tablespace_suffix));
>       }
>       else
>       {
>               /* relation belongs to a tablespace, so use the tablespace 
> location */
> !             strlcpy(map->old_tablespace, old_rel->tablespace, 
> sizeof(map->old_tablespace));
> !             strlcpy(map->new_tablespace, new_rel->tablespace, 
> sizeof(map->new_tablespace));
> !             strlcpy(map->old_tablespace_suffix, 
> old_cluster.tablespace_suffix,
> !                             sizeof(map->old_tablespace_suffix));
> !             strlcpy(map->new_tablespace_suffix, 
> new_cluster.tablespace_suffix,
> !                             sizeof(map->new_tablespace_suffix));
>       }
>   
> +     map->old_db_oid = old_db->db_oid;
> +     map->new_db_oid = new_db->db_oid;
> + 
>       /*
>        * old_relfilenode might differ from pg_class.oid (and hence
>        * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL.
> diff --git a/contrib/pg_upgrade/parallel.c b/contrib/pg_upgrade/parallel.c
> new file mode 100644
> index 8ea36bc..d157511
> *** a/contrib/pg_upgrade/parallel.c
> --- b/contrib/pg_upgrade/parallel.c
> *************** typedef struct {
> *** 34,44 ****
>       char log_file[MAXPGPATH];
>       char opt_log_file[MAXPGPATH];
>       char cmd[MAX_STRING];
> ! } thread_arg;
>   
> ! thread_arg **thread_args;
>   
> ! DWORD win32_exec_prog(thread_arg *args);
>   
>   #endif
>   
> --- 34,57 ----
>       char log_file[MAXPGPATH];
>       char opt_log_file[MAXPGPATH];
>       char cmd[MAX_STRING];
> ! } exec_thread_arg;
>   
> ! typedef struct {
> !     DbInfoArr *old_db_arr;
> !     DbInfoArr *new_db_arr;
> !     char old_pgdata[MAXPGPATH];
> !     char new_pgdata[MAXPGPATH];
> !     char old_tablespace[MAXPGPATH];
> ! } transfer_thread_arg;
>   
> ! exec_thread_arg **exec_thread_args;
> ! transfer_thread_arg **transfer_thread_args;
> ! 
> ! /* track current thread_args struct so reap_child() can be used for all 
> cases */
> ! void **cur_thread_args;
> ! 
> ! DWORD win32_exec_prog(exec_thread_arg *args);
> ! DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args);
>   
>   #endif
>   
> *************** parallel_exec_prog(const char *log_file,
> *** 58,64 ****
>       pid_t           child;
>   #else
>       HANDLE          child;
> !     thread_arg      *new_arg;
>   #endif
>   
>       va_start(args, fmt);
> --- 71,77 ----
>       pid_t           child;
>   #else
>       HANDLE          child;
> !     exec_thread_arg *new_arg;
>   #endif
>   
>       va_start(args, fmt);
> *************** parallel_exec_prog(const char *log_file,
> *** 71,77 ****
>       else
>       {
>               /* parallel */
> ! 
>               /* harvest any dead children */
>               while (reap_child(false) == true)
>                       ;
> --- 84,92 ----
>       else
>       {
>               /* parallel */
> ! #ifdef WIN32
> !             cur_thread_args = (void **)exec_thread_args;
> ! #endif      
>               /* harvest any dead children */
>               while (reap_child(false) == true)
>                       ;
> *************** parallel_exec_prog(const char *log_file,
> *** 100,106 ****
>                       int i;
>   
>                       thread_handles = pg_malloc(user_opts.jobs * 
> sizeof(HANDLE));
> !                     thread_args = pg_malloc(user_opts.jobs * 
> sizeof(thread_arg *));
>   
>                       /*
>                        *      For safety and performance, we keep the args 
> allocated during
> --- 115,121 ----
>                       int i;
>   
>                       thread_handles = pg_malloc(user_opts.jobs * 
> sizeof(HANDLE));
> !                     exec_thread_args = pg_malloc(user_opts.jobs * 
> sizeof(exec_thread_arg *));
>   
>                       /*
>                        *      For safety and performance, we keep the args 
> allocated during
> *************** parallel_exec_prog(const char *log_file,
> *** 108,118 ****
>                        *      in a thread different from the one that 
> allocated it.
>                        */
>                       for (i = 0; i < user_opts.jobs; i++)
> !                             thread_args[i] = pg_malloc(sizeof(thread_arg));
>               }
>   
>               /* use first empty array element */
> !             new_arg = thread_args[parallel_jobs-1];
>   
>               /* Can only pass one pointer into the function, so use a struct 
> */
>               strcpy(new_arg->log_file, log_file);
> --- 123,133 ----
>                        *      in a thread different from the one that 
> allocated it.
>                        */
>                       for (i = 0; i < user_opts.jobs; i++)
> !                             exec_thread_args[i] = 
> pg_malloc(sizeof(exec_thread_arg));
>               }
>   
>               /* use first empty array element */
> !             new_arg = exec_thread_args[parallel_jobs-1];
>   
>               /* Can only pass one pointer into the function, so use a struct 
> */
>               strcpy(new_arg->log_file, log_file);
> *************** parallel_exec_prog(const char *log_file,
> *** 134,140 ****
>   
>   #ifdef WIN32
>   DWORD
> ! win32_exec_prog(thread_arg *args)
>   {
>       int ret;
>   
> --- 149,155 ----
>   
>   #ifdef WIN32
>   DWORD
> ! win32_exec_prog(exec_thread_arg *args)
>   {
>       int ret;
>   
> *************** win32_exec_prog(thread_arg *args)
> *** 147,152 ****
> --- 162,273 ----
>   
>   
>   /*
> +  *  parallel_transfer_all_new_dbs
> +  *
> +  *  This has the same API as transfer_all_new_dbs, except it does parallel 
> execution
> +  *  by transfering multiple tablespaces in parallel
> +  */
> + void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr 
> *new_db_arr,
> +                                                                char 
> *old_pgdata, char *new_pgdata,
> +                                                                char 
> *old_tablespace)
> + {
> + #ifndef WIN32
> +     pid_t           child;
> + #else
> +     HANDLE          child;
> +     transfer_thread_arg     *new_arg;
> + #endif
> + 
> +     if (user_opts.jobs <= 1)
> +             /* throw_error must be true to allow jobs */
> +             transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, 
> new_pgdata, NULL);
> +     else
> +     {
> +             /* parallel */
> + #ifdef WIN32
> +             cur_thread_args = (void **)transfer_thread_args;
> + #endif
> +             /* harvest any dead children */
> +             while (reap_child(false) == true)
> +                     ;
> + 
> +             /* must we wait for a dead child? */
> +             if (parallel_jobs >= user_opts.jobs)
> +                     reap_child(true);
> +                     
> +             /* set this before we start the job */
> +             parallel_jobs++;
> +     
> +             /* Ensure stdio state is quiesced before forking */
> +             fflush(NULL);
> + 
> + #ifndef WIN32
> +             child = fork();
> +             if (child == 0)
> +             {
> +                     transfer_all_new_dbs(old_db_arr, new_db_arr, 
> old_pgdata, new_pgdata,
> +                                                              
> old_tablespace);
> +                     /* if we take another exit path, it will be non-zero */
> +                     /* use _exit to skip atexit() functions */
> +                     _exit(0);
> +             }
> +             else if (child < 0)
> +                     /* fork failed */
> +                     pg_log(PG_FATAL, "could not create worker process: 
> %s\n", strerror(errno));
> + #else
> +             if (thread_handles == NULL)
> +             {
> +                     int i;
> + 
> +                     thread_handles = pg_malloc(user_opts.jobs * 
> sizeof(HANDLE));
> +                     transfer_thread_args = pg_malloc(user_opts.jobs * 
> sizeof(transfer_thread_arg *));
> + 
> +                     /*
> +                      *      For safety and performance, we keep the args 
> allocated during
> +                      *      the entire life of the process, and we don't 
> free the args
> +                      *      in a thread different from the one that 
> allocated it.
> +                      */
> +                     for (i = 0; i < user_opts.jobs; i++)
> +                             transfer_thread_args[i] = 
> pg_malloc(sizeof(transfer_thread_arg));
> +             }
> + 
> +             /* use first empty array element */
> +             new_arg = transfer_thread_args[parallel_jobs-1];
> + 
> +             /* Can only pass one pointer into the function, so use a struct 
> */
> +             new_arg->old_db_arr = old_db_arr;
> +             new_arg->new_db_arr = new_db_arr;
> +             strcpy(new_arg->old_pgdata, old_pgdata);
> +             strcpy(new_arg->new_pgdata, new_pgdata);
> +             strcpy(new_arg->old_tablespace, old_tablespace);
> + 
> +             child = (HANDLE) _beginthreadex(NULL, 0, (void *) 
> win32_exec_prog,
> +                                             new_arg, 0, NULL);
> +             if (child == 0)
> +                     pg_log(PG_FATAL, "could not create worker thread: 
> %s\n", strerror(errno));
> + 
> +             thread_handles[parallel_jobs-1] = child;
> + #endif
> +     }
> + 
> +     return;
> + }
> + 
> + 
> + #ifdef WIN32
> + DWORD
> + win32_transfer_all_new_dbs(transfer_thread_arg *args)
> + {
> +     transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, 
> args->old_pgdata,
> +                                              args->new_pgdata, 
> args->old_tablespace);
> + 
> +     /* terminates thread */
> +     return 0;
> + }
> + #endif
> + 
> + 
> + /*
>    *  collect status from a completed worker child
>    */
>   bool
> *************** reap_child(bool wait_for_child)
> *** 195,201 ****
>       /*      Move last slot into dead child's position */
>       if (thread_num != parallel_jobs - 1)
>       {
> !             thread_arg *tmp_args;
>       
>               thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
>   
> --- 316,322 ----
>       /*      Move last slot into dead child's position */
>       if (thread_num != parallel_jobs - 1)
>       {
> !             void *tmp_args;
>       
>               thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
>   
> *************** reap_child(bool wait_for_child)
> *** 205,213 ****
>                *      reused by the next created thread.  Instead, the new 
> thread
>                *      will use the arg struct of the thread that just died.
>                */
> !             tmp_args = thread_args[thread_num];
> !             thread_args[thread_num] = thread_args[parallel_jobs - 1];
> !             thread_args[parallel_jobs - 1] = tmp_args;
>       }
>   #endif
>   
> --- 326,334 ----
>                *      reused by the next created thread.  Instead, the new 
> thread
>                *      will use the arg struct of the thread that just died.
>                */
> !             tmp_args = cur_thread_args[thread_num];
> !             cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 
> 1];
> !             cur_thread_args[parallel_jobs - 1] = tmp_args;
>       }
>   #endif
>   
> diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
> new file mode 100644
> index 70c749d..85997e5
> *** a/contrib/pg_upgrade/pg_upgrade.c
> --- b/contrib/pg_upgrade/pg_upgrade.c
> *************** main(int argc, char **argv)
> *** 133,139 ****
>       if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
>               disable_old_cluster();
>   
> !     transfer_all_new_dbs(&old_cluster.dbarr, &new_cluster.dbarr,
>                                                old_cluster.pgdata, 
> new_cluster.pgdata);
>   
>       /*
> --- 133,139 ----
>       if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
>               disable_old_cluster();
>   
> !     transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr,
>                                                old_cluster.pgdata, 
> new_cluster.pgdata);
>   
>       /*
> diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
> new file mode 100644
> index c1a2f53..d5c3fa9
> *** a/contrib/pg_upgrade/pg_upgrade.h
> --- b/contrib/pg_upgrade/pg_upgrade.h
> *************** typedef struct
> *** 134,141 ****
>    */
>   typedef struct
>   {
> !     char            old_dir[MAXPGPATH];
> !     char            new_dir[MAXPGPATH];
>   
>       /*
>        * old/new relfilenodes might differ for pg_largeobject(_metadata) 
> indexes
> --- 134,145 ----
>    */
>   typedef struct
>   {
> !     char            old_tablespace[MAXPGPATH];
> !     char            new_tablespace[MAXPGPATH];
> !     char            old_tablespace_suffix[MAXPGPATH];
> !     char            new_tablespace_suffix[MAXPGPATH];
> !     Oid                     old_db_oid;
> !     Oid                     new_db_oid;
>   
>       /*
>        * old/new relfilenodes might differ for pg_largeobject(_metadata) 
> indexes
> *************** typedef struct
> *** 276,283 ****
>       const char *progname;           /* complete pathname for this program */
>       char       *exec_path;          /* full path to my executable */
>       char       *user;                       /* username for clusters */
> !     char      **tablespaces;        /* tablespaces */
> !     int                     num_tablespaces;
>       char      **libraries;          /* loadable libraries */
>       int                     num_libraries;
>       ClusterInfo *running_cluster;
> --- 280,287 ----
>       const char *progname;           /* complete pathname for this program */
>       char       *exec_path;          /* full path to my executable */
>       char       *user;                       /* username for clusters */
> !     char      **old_tablespaces;    /* tablespaces */
> !     int                     num_old_tablespaces;
>       char      **libraries;          /* loadable libraries */
>       int                     num_libraries;
>       ClusterInfo *running_cluster;
> *************** void          get_sock_dir(ClusterInfo *cluster,
> *** 398,406 ****
>   /* relfilenode.c */
>   
>   void                get_pg_database_relfilenode(ClusterInfo *cluster);
> ! void                transfer_all_new_dbs(DbInfoArr *olddb_arr,
> !                                DbInfoArr *newdb_arr, char *old_pgdata, char 
> *new_pgdata);
> ! 
>   
>   /* tablespace.c */
>   
> --- 402,412 ----
>   /* relfilenode.c */
>   
>   void                get_pg_database_relfilenode(ClusterInfo *cluster);
> ! void                transfer_all_new_tablespaces(DbInfoArr *old_db_arr,
> !                                DbInfoArr *new_db_arr, char *old_pgdata, 
> char *new_pgdata);
> ! void                transfer_all_new_dbs(DbInfoArr *old_db_arr,
> !                                DbInfoArr *new_db_arr, char *old_pgdata, 
> char *new_pgdata,
> !                                char *old_tablespace);
>   
>   /* tablespace.c */
>   
> *************** void old_8_3_invalidate_bpchar_pattern_o
> *** 464,472 ****
>   char           *old_8_3_create_sequence_script(ClusterInfo *cluster);
>   
>   /* parallel.c */
> ! void parallel_exec_prog(const char *log_file, const char *opt_log_file,
>                 const char *fmt,...)
>   __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
> ! 
> ! bool reap_child(bool wait_for_child);
>   
> --- 470,480 ----
>   char           *old_8_3_create_sequence_script(ClusterInfo *cluster);
>   
>   /* parallel.c */
> ! void                parallel_exec_prog(const char *log_file, const char 
> *opt_log_file,
>                 const char *fmt,...)
>   __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
> ! void                parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, 
> DbInfoArr *new_db_arr,
> !                                                                             
>   char *old_pgdata, char *new_pgdata,
> !                                                                             
>   char *old_tablespace);
> ! bool                reap_child(bool wait_for_child);
>   
> diff --git a/contrib/pg_upgrade/relfilenode.c 
> b/contrib/pg_upgrade/relfilenode.c
> new file mode 100644
> index 9d0d5a0..cfdd39c
> *** a/contrib/pg_upgrade/relfilenode.c
> --- b/contrib/pg_upgrade/relfilenode.c
> ***************
> *** 16,42 ****
>   
>   
>   static void transfer_single_new_db(pageCnvCtx *pageConverter,
> !                                        FileNameMap *maps, int size);
>   static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
>                                                        const char *suffix);
>   
>   
>   /*
>    * transfer_all_new_dbs()
>    *
>    * Responsible for upgrading all database. invokes routines to generate 
> mappings and then
>    * physically link the databases.
>    */
>   void
> ! transfer_all_new_dbs(DbInfoArr *old_db_arr,
> !                                DbInfoArr *new_db_arr, char *old_pgdata, 
> char *new_pgdata)
>   {
>       int                     old_dbnum,
>                               new_dbnum;
>   
> -     pg_log(PG_REPORT, "%s user relation files\n",
> -       user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : 
> "Copying");
> - 
>       /* Scan the old cluster databases and transfer their files */
>       for (old_dbnum = new_dbnum = 0;
>                old_dbnum < old_db_arr->ndbs;
> --- 16,81 ----
>   
>   
>   static void transfer_single_new_db(pageCnvCtx *pageConverter,
> !                                        FileNameMap *maps, int size, char 
> *old_tablespace);
>   static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
>                                                        const char *suffix);
>   
>   
>   /*
> +  * transfer_all_new_tablespaces()
> +  *
> +  * Responsible for upgrading all database. invokes routines to generate 
> mappings and then
> +  * physically link the databases.
> +  */
> + void
> + transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> +                                     char *old_pgdata, char *new_pgdata)
> + {
> +     pg_log(PG_REPORT, "%s user relation files\n",
> +       user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : 
> "Copying");
> + 
> +     /*
> +      *      Transfering files by tablespace is tricky because a single 
> database
> +      *      can use multiple tablespaces.  For non-parallel mode, we just 
> pass a
> +      *      NULL tablespace path, which matches all tablespaces.  In 
> parallel mode,
> +      *      we pass the default tablespace and all user-created tablespaces
> +      *      and let those operations happen in parallel.
> +      */
> +     if (user_opts.jobs <= 1)
> +             parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, 
> old_pgdata,
> +                                                                       
> new_pgdata, NULL);
> +     else
> +     {
> +             int tblnum;
> +             
> +             for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
> +                     parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, 
> old_pgdata,
> +                                                               new_pgdata, 
> os_info.old_tablespaces[tblnum]);
> +             /* reap all children */
> +             while (reap_child(true) == true)
> +                     ;
> +     }
> + 
> +     end_progress_output();
> +     check_ok();
> + 
> +     return;
> + }
> + 
> + 
> + /*
>    * transfer_all_new_dbs()
>    *
>    * Responsible for upgrading all database. invokes routines to generate 
> mappings and then
>    * physically link the databases.
>    */
>   void
> ! transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> !                                     char *old_pgdata, char *new_pgdata, 
> char *old_tablespace)
>   {
>       int                     old_dbnum,
>                               new_dbnum;
>   
>       /* Scan the old cluster databases and transfer their files */
>       for (old_dbnum = new_dbnum = 0;
>                old_dbnum < old_db_arr->ndbs;
> *************** transfer_all_new_dbs(DbInfoArr *old_db_a
> *** 75,89 ****
>   #ifdef PAGE_CONVERSION
>                       pageConverter = setupPageConverter();
>   #endif
> !                     transfer_single_new_db(pageConverter, mappings, n_maps);
>   
>                       pg_free(mappings);
>               }
>       }
>   
> -     end_progress_output();
> -     check_ok();
> - 
>       return;
>   }
>   
> --- 114,126 ----
>   #ifdef PAGE_CONVERSION
>                       pageConverter = setupPageConverter();
>   #endif
> !                     transfer_single_new_db(pageConverter, mappings, n_maps,
> !                                                                
> old_tablespace);
>   
>                       pg_free(mappings);
>               }
>       }
>   
>       return;
>   }
>   
> *************** get_pg_database_relfilenode(ClusterInfo
> *** 125,131 ****
>    */
>   static void
>   transfer_single_new_db(pageCnvCtx *pageConverter,
> !                                        FileNameMap *maps, int size)
>   {
>       int                     mapnum;
>       bool            vm_crashsafe_match = true;
> --- 162,168 ----
>    */
>   static void
>   transfer_single_new_db(pageCnvCtx *pageConverter,
> !                                        FileNameMap *maps, int size, char 
> *old_tablespace)
>   {
>       int                     mapnum;
>       bool            vm_crashsafe_match = true;
> *************** transfer_single_new_db(pageCnvCtx *pageC
> *** 140,157 ****
>   
>       for (mapnum = 0; mapnum < size; mapnum++)
>       {
> !             /* transfer primary file */
> !             transfer_relfile(pageConverter, &maps[mapnum], "");
> ! 
> !             /* fsm/vm files added in PG 8.4 */
> !             if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
>               {
> !                     /*
> !                      * Copy/link any fsm and vm files, if they exist
> !                      */
> !                     transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
> !                     if (vm_crashsafe_match)
> !                             transfer_relfile(pageConverter, &maps[mapnum], 
> "_vm");
>               }
>       }
>   }
> --- 177,198 ----
>   
>       for (mapnum = 0; mapnum < size; mapnum++)
>       {
> !             if (old_tablespace == NULL ||
> !                     strcmp(maps[mapnum].old_tablespace, old_tablespace) == 
> 0)
>               {
> !                     /* transfer primary file */
> !                     transfer_relfile(pageConverter, &maps[mapnum], "");
> !     
> !                     /* fsm/vm files added in PG 8.4 */
> !                     if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
> !                     {
> !                             /*
> !                              * Copy/link any fsm and vm files, if they exist
> !                              */
> !                             transfer_relfile(pageConverter, &maps[mapnum], 
> "_fsm");
> !                             if (vm_crashsafe_match)
> !                                     transfer_relfile(pageConverter, 
> &maps[mapnum], "_vm");
> !                     }
>               }
>       }
>   }
> *************** transfer_relfile(pageCnvCtx *pageConvert
> *** 187,196 ****
>               else
>                       snprintf(extent_suffix, sizeof(extent_suffix), ".%d", 
> segno);
>   
> !             snprintf(old_file, sizeof(old_file), "%s/%u%s%s", map->old_dir,
> !                              map->old_relfilenode, type_suffix, 
> extent_suffix);
> !             snprintf(new_file, sizeof(new_file), "%s/%u%s%s", map->new_dir,
> !                              map->new_relfilenode, type_suffix, 
> extent_suffix);
>       
>               /* Is it an extent, fsm, or vm file? */
>               if (type_suffix[0] != '\0' || segno != 0)
> --- 228,239 ----
>               else
>                       snprintf(extent_suffix, sizeof(extent_suffix), ".%d", 
> segno);
>   
> !             snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", 
> map->old_tablespace,
> !                              map->old_tablespace_suffix, map->old_db_oid, 
> map->old_relfilenode,
> !                              type_suffix, extent_suffix);
> !             snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", 
> map->new_tablespace,
> !                              map->new_tablespace_suffix, map->new_db_oid, 
> map->new_relfilenode,
> !                              type_suffix, extent_suffix);
>       
>               /* Is it an extent, fsm, or vm file? */
>               if (type_suffix[0] != '\0' || segno != 0)
> *************** transfer_relfile(pageCnvCtx *pageConvert
> *** 239,241 ****
> --- 282,285 ----
>   
>       return;
>   }
> + 
> diff --git a/contrib/pg_upgrade/tablespace.c b/contrib/pg_upgrade/tablespace.c
> new file mode 100644
> index a93c517..321738d
> *** a/contrib/pg_upgrade/tablespace.c
> --- b/contrib/pg_upgrade/tablespace.c
> *************** init_tablespaces(void)
> *** 23,29 ****
>       set_tablespace_directory_suffix(&old_cluster);
>       set_tablespace_directory_suffix(&new_cluster);
>   
> !     if (os_info.num_tablespaces > 0 &&
>       strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 
> 0)
>               pg_log(PG_FATAL,
>                          "Cannot upgrade to/from the same system catalog 
> version when\n"
> --- 23,29 ----
>       set_tablespace_directory_suffix(&old_cluster);
>       set_tablespace_directory_suffix(&new_cluster);
>   
> !     if (os_info.num_old_tablespaces > 0 &&
>       strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 
> 0)
>               pg_log(PG_FATAL,
>                          "Cannot upgrade to/from the same system catalog 
> version when\n"
> *************** get_tablespace_paths(void)
> *** 57,72 ****
>   
>       res = executeQueryOrDie(conn, "%s", query);
>   
> !     if ((os_info.num_tablespaces = PQntuples(res)) != 0)
> !             os_info.tablespaces = (char **) pg_malloc(
> !                                                                
> os_info.num_tablespaces * sizeof(char *));
>       else
> !             os_info.tablespaces = NULL;
>   
>       i_spclocation = PQfnumber(res, "spclocation");
>   
> !     for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++)
> !             os_info.tablespaces[tblnum] = pg_strdup(
>                                                                        
> PQgetvalue(res, tblnum, i_spclocation));
>   
>       PQclear(res);
> --- 57,72 ----
>   
>       res = executeQueryOrDie(conn, "%s", query);
>   
> !     if ((os_info.num_old_tablespaces = PQntuples(res)) != 0)
> !             os_info.old_tablespaces = (char **) pg_malloc(
> !                                                                
> os_info.num_old_tablespaces * sizeof(char *));
>       else
> !             os_info.old_tablespaces = NULL;
>   
>       i_spclocation = PQfnumber(res, "spclocation");
>   
> !     for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
> !             os_info.old_tablespaces[tblnum] = pg_strdup(
>                                                                        
> PQgetvalue(res, tblnum, i_spclocation));
>   
>       PQclear(res);
> diff --git a/doc/src/sgml/pgupgrade.sgml b/doc/src/sgml/pgupgrade.sgml
> new file mode 100644
> index 53781e4..3e5d548
> *** a/doc/src/sgml/pgupgrade.sgml
> --- b/doc/src/sgml/pgupgrade.sgml
> *************** NET STOP pgsql-8.3  (<productname>Postgr
> *** 342,351 ****
>   
>       <para>
>        The <option>--jobs</> option allows multiple CPU cores to be used
> !      to dump and reload database schemas in parallel;  a good place to
> !      start is the number of CPU cores on the server.  This option can
> !      dramatically reduce the time to upgrade a multi-database server
> !      running on a multiprocessor machine.
>       </para>
>   
>       <para>
> --- 342,353 ----
>   
>       <para>
>        The <option>--jobs</> option allows multiple CPU cores to be used
> !      for file copy operations and to dump and reload database schemas in
> !      parallel;  a good place to start is the number of CPU cores on the
> !      server, or the number of tablespaces if not using the
> !      <option>--link</> option.  This option can dramatically reduce the
> !      time to upgrade a multi-database server running on a multiprocessor
> !      machine.
>       </para>
>   
>       <para>

> :
> 
> . traprm
> 
> export QUIET=$((QUIET + 1))
> 
> > /rtmp/out
> 
> OLDREL=9.2
> NEWREL=9.3
> BRANCH=tablespace
> SSD="f"
> 
> export PGOPTIONS="-c synchronous_commit=off"
> 
> for CYCLES in 2 4 8 16 32 64 128
> do
>       echo "$CYCLES" >> /rtmp/out
>       for JOBLIMIT in 1 2
>       do
>               cd /pgsql/$REL
>               pgsw $BRANCH
>               cd -
>               tools/setup $OLDREL $NEWREL
>               sleep 2
>               [ "$SSD" = "f" ] && tools/mv_to_archive
>       
>               # need for +16k
>               for CONFIG in /u/pgsql.old/data/postgresql.conf 
> /u/pgsql/data/postgresql.conf
>               do
>                       pipe sed 's/#max_locks_per_transaction = 
> 64/max_locks_per_transaction = 64000/' "$CONFIG"
>                       pipe sed 's/shared_buffers = 128MB/shared_buffers = 
> 1GB/' "$CONFIG"
>               done
> 
>               pgstart -w /u/pgsql.old/data
>               for DIR in /archive/tmp/t1 /backup0/tmp/t2
>               do      rm -rf "$DIR"
>                       mkdir "$DIR"
>                       chown postgres "$DIR"
>                       chmod 0700 "$DIR"
>               done
> 
>               echo "CREATE TABLESPACE t1 LOCATION '/archive/tmp/t1';" | sql 
> --echo-all test
>               echo "CREATE TABLESPACE t2 LOCATION '/backup0/tmp/t2';" | sql 
> --echo-all test
> 
>               for SPNO in $(jot 2)
>               do
>                       for TBLNO in $(jot $(($CYCLES / 2)) )
>                       do 
>                               echo "CREATE TABLE test${SPNO}_${TBLNO} (x 
> TEXT) TABLESPACE t$SPNO;"
>                               echo "ALTER TABLE test${SPNO}_${TBLNO} ALTER 
> COLUMN x SET STORAGE EXTERNAL;"
>                               echo "INSERT INTO test${SPNO}_${TBLNO} SELECT 
> repeat('x', 999000000);"
>                       done |
>                       PGOPTIONS="-c synchronous_commit=off" sql 
> --single-transaction --echo-all test
>               done
>               pgstop /u/pgsql.old/data
>               sleep 2
>               # clear cache
>               echo 3 > /proc/sys/vm/drop_caches
>               # allow system to repopulate
>               sleep 15
>               /usr/bin/time --output=/rtmp/out --append --format '%e' 
> tools/upgrade -j $JOBLIMIT || exit 
>               sleep 2
>       done
> done
> 
> bell

> 
> -- 
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers


-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to