In my first attached, applied patch, I have found a way to speed
relations lookups in pg_upgrade.  I knew there was a way to optimize
this but it was not clear until my major cleanups.  Instead of doing
effectively a nested loop join on old/new relations, I now order them
and use a 1:1 mergejoin.  This should speed up pg_upgrade for many
relations.

The second patch removes a hack for toast relations that is unnecessary
now that we always preserve pg_class.oid.  The old code preserved
relfilenodes for non-toast tables and oids for toast relations, which
was obviously confusing and non-optimal.

Remember, this code was not originally written by me but someone at
EnteprriseDB, so I didn't fully understand it until now.

I love removing functions!

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +
commit 002c105a0706bd1c1e939fe0f47ecdceeae6c52d
Author: Bruce Momjian <br...@momjian.us>
Date:   Sat Jan 8 13:44:44 2011 -0500

    In pg_upgrade, remove functions that did sequential array scans looking
    up relations, but rather order old/new relations and use the same array
    index value for both.  This should speed up pg_upgrade for databases
    with many relations.

diff --git a/contrib/pg_upgrade/function.c b/contrib/pg_upgrade/function.c
index cb9576a..d7c790c 100644
*** /tmp/UF8KGb_function.c      Sat Jan  8 13:46:55 2011
--- /tmp/4GeXcc_function.c      Sat Jan  8 13:46:55 2011
***************
*** 13,25 ****
  
  
  /*
!  * install_support_functions_in_db()
   *
   * pg_upgrade requires some support functions that enable it to modify
   * backend behavior.
   */
  void
! install_support_functions_in_db(const char *db_name)
  {
        PGconn *conn = connectToServer(&new_cluster, db_name);
        
--- 13,25 ----
  
  
  /*
!  * install_support_functions_in_new_db()
   *
   * pg_upgrade requires some support functions that enable it to modify
   * backend behavior.
   */
  void
! install_support_functions_in_new_db(const char *db_name)
  {
        PGconn *conn = connectToServer(&new_cluster, db_name);
        
*************** install_support_functions_in_db(const ch
*** 87,93 ****
  
  
  void
! uninstall_support_functions(void)
  {
        int                     dbnum;
  
--- 87,93 ----
  
  
  void
! uninstall_support_functions_from_new_cluster(void)
  {
        int                     dbnum;
  
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
index 50e4de2..c805a04 100644
*** /tmp/KMuVfb_info.c  Sat Jan  8 13:46:55 2011
--- /tmp/gznt7a_info.c  Sat Jan  8 13:46:55 2011
*************** static void create_rel_filename_map(cons
*** 21,30 ****
                          const DbInfo *old_db, const DbInfo *new_db,
                          const RelInfo *old_rel, const RelInfo *new_rel,
                          FileNameMap *map);
- static RelInfo *relarr_lookup_rel_name(ClusterInfo *cluster, RelInfoArr 
*rel_arr,
-                                 const char *nspname, const char *relname);
- static RelInfo *relarr_lookup_rel_oid(ClusterInfo *cluster, RelInfoArr 
*rel_arr,
-                               Oid oid);
  
  
  /*
--- 21,26 ----
*************** gen_db_file_maps(DbInfo *old_db, DbInfo 
*** 42,59 ****
        int                     relnum;
        int                     num_maps = 0;
  
        maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
                                                                         
old_db->rel_arr.nrels);
  
        for (relnum = 0; relnum < old_db->rel_arr.nrels; relnum++)
        {
                RelInfo    *old_rel = &old_db->rel_arr.rels[relnum];
!               RelInfo    *new_rel;
! 
!               /* old/new relation names always match */
!               new_rel = relarr_lookup_rel_name(&new_cluster, &new_db->rel_arr,
!                                                                  
old_rel->nspname, old_rel->relname);
  
                create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
                                old_rel, new_rel, maps + num_maps);
                num_maps++;
--- 38,59 ----
        int                     relnum;
        int                     num_maps = 0;
  
+       if (old_db->rel_arr.nrels != new_db->rel_arr.nrels)
+               pg_log(PG_FATAL, "old and new databases \"%s\" have a different 
number of relations\n",
+                                       old_db->db_name);
+ 
        maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
                                                                         
old_db->rel_arr.nrels);
  
        for (relnum = 0; relnum < old_db->rel_arr.nrels; relnum++)
        {
                RelInfo    *old_rel = &old_db->rel_arr.rels[relnum];
!               RelInfo    *new_rel = &old_db->rel_arr.rels[relnum];
  
+               if (old_rel->reloid != new_rel->reloid)
+                       pg_log(PG_FATAL, "mismatch of relation id: database 
\"%s\", old relid %d, new relid %d\n",
+                       old_db->db_name, old_rel->reloid, new_rel->reloid);
+       
                create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
                                old_rel, new_rel, maps + num_maps);
                num_maps++;
*************** get_db_infos(ClusterInfo *cluster)
*** 153,159 ****
                                                        "FROM 
pg_catalog.pg_database d "
                                                        " LEFT OUTER JOIN 
pg_catalog.pg_tablespace t "
                                                        " ON d.dattablespace = 
t.oid "
!                                                       "WHERE d.datallowconn = 
true");
  
        i_datname = PQfnumber(res, "datname");
        i_oid = PQfnumber(res, "oid");
--- 153,161 ----
                                                        "FROM 
pg_catalog.pg_database d "
                                                        " LEFT OUTER JOIN 
pg_catalog.pg_tablespace t "
                                                        " ON d.dattablespace = 
t.oid "
!                                                       "WHERE d.datallowconn = 
true "
!       /* we don't preserve pg_database.oid so we sort by name */
!                                                       "ORDER BY 2");
  
        i_datname = PQfnumber(res, "datname");
        i_oid = PQfnumber(res, "oid");
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 258,264 ****
                         "GROUP BY  c.oid, n.nspname, c.relname, c.relfilenode,"
                         "                      c.reltoastrelid, t.spclocation, 
"
                         "                      n.nspname "
!                        "ORDER BY t.spclocation, n.nspname, c.relname;",
                         FirstNormalObjectId,
        /* does pg_largeobject_metadata need to be migrated? */
                         (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
--- 260,267 ----
                         "GROUP BY  c.oid, n.nspname, c.relname, c.relfilenode,"
                         "                      c.reltoastrelid, t.spclocation, 
"
                         "                      n.nspname "
!       /* we preserve pg_class.oid so we sort by it to match old/new */
!                        "ORDER BY 1;",
                         FirstNormalObjectId,
        /* does pg_largeobject_metadata need to be migrated? */
                         (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
*************** get_rel_infos(ClusterInfo *cluster, DbIn
*** 308,393 ****
  
        dbinfo->rel_arr.rels = relinfos;
        dbinfo->rel_arr.nrels = num_rels;
-       dbinfo->rel_arr.last_relname_lookup = 0;
- }
- 
- 
- /*
-  * dbarr_lookup_db()
-  *
-  * Returns the pointer to the DbInfo structure
-  */
- DbInfo *
- dbarr_lookup_db(DbInfoArr *db_arr, const char *db_name)
- {
-       int                     dbnum;
- 
-       for (dbnum = 0; dbnum < db_arr->ndbs; dbnum++)
-       {
-               if (strcmp(db_arr->dbs[dbnum].db_name, db_name) == 0)
-                       return &db_arr->dbs[dbnum];
-       }
- 
-       return NULL;
- }
- 
- 
- /*
-  * relarr_lookup_rel_name()
-  *
-  * Searches "relname" in rel_arr. Returns the *real* pointer to the
-  * RelInfo structure.
-  */
- static RelInfo *
- relarr_lookup_rel_name(ClusterInfo *cluster, RelInfoArr *rel_arr,
-                                       const char *nspname, const char 
*relname)
- {
-       int                     relnum;
- 
-       /* Test next lookup first, for speed */
-       if (rel_arr->last_relname_lookup + 1 < rel_arr->nrels &&
-               strcmp(rel_arr->rels[rel_arr->last_relname_lookup + 1].nspname, 
nspname) == 0 &&
-               strcmp(rel_arr->rels[rel_arr->last_relname_lookup + 1].relname, 
relname) == 0)
-       {
-               rel_arr->last_relname_lookup++;
-               return &rel_arr->rels[rel_arr->last_relname_lookup];
-       }
- 
-       for (relnum = 0; relnum < rel_arr->nrels; relnum++)
-       {
-               if (strcmp(rel_arr->rels[relnum].nspname, nspname) == 0 &&
-                       strcmp(rel_arr->rels[relnum].relname, relname) == 0)
-               {
-                       rel_arr->last_relname_lookup = relnum;
-                       return &rel_arr->rels[relnum];
-               }
-       }
-       pg_log(PG_FATAL, "Could not find %s.%s in %s cluster\n",
-                  nspname, relname, CLUSTER_NAME(cluster));
-       return NULL;
- }
- 
- 
- /*
-  * relarr_lookup_rel_oid()
-  *
-  *    Returns a pointer to the RelInfo structure for the
-  *    given oid or NULL if the desired entry cannot be
-  *    found.
-  */
- static RelInfo *
- relarr_lookup_rel_oid(ClusterInfo *cluster, RelInfoArr *rel_arr, Oid oid)
- {
-       int                     relnum;
- 
-       for (relnum = 0; relnum < rel_arr->nrels; relnum++)
-       {
-               if (rel_arr->rels[relnum].reloid == oid)
-                       return &rel_arr->rels[relnum];
-       }
-       pg_log(PG_FATAL, "Could not find %d in %s cluster\n",
-                  oid, CLUSTER_NAME(cluster));
-       return NULL;
  }
  
  
--- 311,316 ----
*************** free_rel_arr(RelInfoArr *rel_arr)
*** 396,402 ****
  {
        pg_free(rel_arr->rels);
        rel_arr->nrels = 0;
-       rel_arr->last_relname_lookup = 0;
  }
  
  
--- 319,324 ----
diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
index a428a03..485412d 100644
*** /tmp/oNcxDc_pg_upgrade.c    Sat Jan  8 13:46:55 2011
--- /tmp/YfgMvc_pg_upgrade.c    Sat Jan  8 13:46:55 2011
*************** prepare_new_databases(void)
*** 229,235 ****
         *      Install support functions in the database accessed by
         *      GLOBALS_DUMP_FILE because it can preserve pg_authid.oid.
         */
!       install_support_functions_in_db(os_info.user);
  
        /*
         * We have to create the databases first so we can install support
--- 229,235 ----
         *      Install support functions in the database accessed by
         *      GLOBALS_DUMP_FILE because it can preserve pg_authid.oid.
         */
!       install_support_functions_in_new_db(os_info.user);
  
        /*
         * We have to create the databases first so we can install support
*************** prepare_new_databases(void)
*** 244,249 ****
--- 244,250 ----
                          GLOBALS_DUMP_FILE, log_opts.filename);
        check_ok();
  
+       /* we load this to get a current list of databases */
        get_db_and_rel_infos(&new_cluster);
  
        stop_postmaster(false, false);
*************** create_new_objects(void)
*** 266,272 ****
  
                /* skip db we already installed */
                if (strcmp(new_db->db_name, os_info.user) != 0)
!                       install_support_functions_in_db(new_db->db_name);
        }
        check_ok();
  
--- 267,273 ----
  
                /* skip db we already installed */
                if (strcmp(new_db->db_name, os_info.user) != 0)
!                       install_support_functions_in_new_db(new_db->db_name);
        }
        check_ok();
  
*************** create_new_objects(void)
*** 279,289 ****
                          DB_DUMP_FILE, log_opts.filename);
        check_ok();
  
!       /* regenerate now that we have db schemas */
        dbarr_free(&new_cluster.dbarr);
        get_db_and_rel_infos(&new_cluster);
  
!       uninstall_support_functions();
  
        stop_postmaster(false, false);
  }
--- 280,290 ----
                          DB_DUMP_FILE, log_opts.filename);
        check_ok();
  
!       /* regenerate now that we have objects in the databases */
        dbarr_free(&new_cluster.dbarr);
        get_db_and_rel_infos(&new_cluster);
  
!       uninstall_support_functions_from_new_cluster();
  
        stop_postmaster(false, false);
  }
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
index acd453b..18cbc19 100644
*** /tmp/iyC1Eb_pg_upgrade.h    Sat Jan  8 13:46:55 2011
--- /tmp/kjwEac_pg_upgrade.h    Sat Jan  8 13:46:55 2011
*************** typedef struct
*** 77,83 ****
  {
        RelInfo    *rels;
        int                     nrels;
-       int                     last_relname_lookup;    /* cache of last lookup 
location */
  } RelInfoArr;
  
  /*
--- 77,82 ----
*************** void            check_hard_link(void);
*** 321,328 ****
  
  /* function.c */
  
! void          install_support_functions_in_db(const char *db_name);
! void          uninstall_support_functions(void);
  void          get_loadable_libraries(void);
  void          check_loadable_libraries(void);
  
--- 320,327 ----
  
  /* function.c */
  
! void          install_support_functions_in_new_db(const char *db_name);
! void          uninstall_support_functions_from_new_cluster(void);
  void          get_loadable_libraries(void);
  void          check_loadable_libraries(void);
  
*************** void            check_loadable_libraries(void);
*** 331,338 ****
  FileNameMap *gen_db_file_maps(DbInfo *old_db,
                                 DbInfo *new_db, int *nmaps, const char 
*old_pgdata,
                                 const char *new_pgdata);
! void get_db_and_rel_infos(ClusterInfo *cluster);
! DbInfo           *dbarr_lookup_db(DbInfoArr *db_arr, const char *db_name);
  void          dbarr_free(DbInfoArr *db_arr);
  void print_maps(FileNameMap *maps, int n,
                   const char *dbName);
--- 330,336 ----
  FileNameMap *gen_db_file_maps(DbInfo *old_db,
                                 DbInfo *new_db, int *nmaps, const char 
*old_pgdata,
                                 const char *new_pgdata);
! void          get_db_and_rel_infos(ClusterInfo *cluster);
  void          dbarr_free(DbInfoArr *db_arr);
  void print_maps(FileNameMap *maps, int n,
                   const char *dbName);
diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c
index b30f5e0..ca96407 100644
*** /tmp/0UKbOa_relfilenode.c   Sat Jan  8 13:46:55 2011
--- /tmp/eAIxgb_relfilenode.c   Sat Jan  8 13:46:55 2011
*************** char            scandir_file_pattern[MAXPGPATH];
*** 29,50 ****
   * physically link the databases.
   */
  const char *
! transfer_all_new_dbs(DbInfoArr *olddb_arr,
!                                        DbInfoArr *newdb_arr, char 
*old_pgdata, char *new_pgdata)
  {
        int                     dbnum;
        const char *msg = NULL;
  
        prep_status("Restoring user relation files\n");
  
!       for (dbnum = 0; dbnum < newdb_arr->ndbs; dbnum++)
        {
!               DbInfo     *new_db = &newdb_arr->dbs[dbnum];
!               DbInfo     *old_db = dbarr_lookup_db(olddb_arr, 
new_db->db_name);
                FileNameMap *mappings;
                int                     n_maps;
                pageCnvCtx *pageConverter = NULL;
  
                n_maps = 0;
                mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata,
                                                                        
new_pgdata);
--- 29,57 ----
   * physically link the databases.
   */
  const char *
! transfer_all_new_dbs(DbInfoArr *old_db_arr,
!                                        DbInfoArr *new_db_arr, char 
*old_pgdata, char *new_pgdata)
  {
        int                     dbnum;
        const char *msg = NULL;
  
        prep_status("Restoring user relation files\n");
  
!       if (old_db_arr->ndbs != new_db_arr->ndbs)
!               pg_log(PG_FATAL, "old and new clusters have a different number 
of databases\n");
!       
!       for (dbnum = 0; dbnum < old_db_arr->ndbs; dbnum++)
        {
!               DbInfo     *old_db = &old_db_arr->dbs[dbnum];
!               DbInfo     *new_db = &new_db_arr->dbs[dbnum];
                FileNameMap *mappings;
                int                     n_maps;
                pageCnvCtx *pageConverter = NULL;
  
+               if (strcmp(old_db->db_name, new_db->db_name) != 0)
+                       pg_log(PG_FATAL, "old and new databases have a 
different names: old \"%s\", new \"%s\"\n",
+                               old_db->db_name, new_db->db_name);
+               
                n_maps = 0;
                mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata,
                                                                        
new_pgdata);
commit a60b32b3dcb4d49a60c96558ce405764ac17c799
Author: Bruce Momjian <br...@momjian.us>
Date:   Sat Jan 8 08:01:12 2011 -0500

    In pg_upgrade, remove unnecessary separate handling of toast tables now
    that we restore by oid;  they can be handled like regular tables when
    creating the file mapping structure.

diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
index 4360d39..50e4de2 100644
*** /tmp/W2sFHd_info.c  Sat Jan  8 13:46:40 2011
--- /tmp/2POKce_info.c  Sat Jan  8 13:46:40 2011
*************** gen_db_file_maps(DbInfo *old_db, DbInfo 
*** 50,59 ****
                RelInfo    *old_rel = &old_db->rel_arr.rels[relnum];
                RelInfo    *new_rel;
  
-               /* toast tables are handled by their parents */
-               if (strcmp(old_rel->nspname, "pg_toast") == 0)
-                       continue;
- 
                /* old/new relation names always match */
                new_rel = relarr_lookup_rel_name(&new_cluster, &new_db->rel_arr,
                                                                   
old_rel->nspname, old_rel->relname);
--- 50,55 ----
*************** gen_db_file_maps(DbInfo *old_db, DbInfo 
*** 61,99 ****
                create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
                                old_rel, new_rel, maps + num_maps);
                num_maps++;
- 
-               /*
-                * So much for mapping this relation;  now we need a mapping
-                * for its corresponding toast relation and toast index, if any.
-                */
-               if (old_rel->toastrelid > 0)
-               {
-                       char            old_name[MAXPGPATH], 
new_name[MAXPGPATH];
-                       RelInfo    *old_toast, *new_toast;
- 
-                       /* use the toast relids from the rel_arr for lookups */
-                       old_toast = relarr_lookup_rel_oid(&old_cluster, 
&old_db->rel_arr,
-                                                                               
          old_rel->toastrelid);
-                       new_toast = relarr_lookup_rel_oid(&new_cluster, 
&new_db->rel_arr,
-                                                                               
          new_rel->toastrelid);
- 
-                       create_rel_filename_map(old_pgdata, new_pgdata, old_db, 
new_db,
-                                       old_toast, new_toast, maps + num_maps);
-                       num_maps++;
- 
-                       /* toast indexes are the same, except with an "_index" 
suffix */
-                       snprintf(old_name, sizeof(old_name), "%s_index", 
old_toast->relname);
-                       snprintf(new_name, sizeof(new_name), "%s_index", 
new_toast->relname);
- 
-                       old_toast = relarr_lookup_rel_name(&old_cluster, 
&old_db->rel_arr,
-                                                                               
  "pg_toast", old_name);
-                       new_toast = relarr_lookup_rel_name(&new_cluster, 
&new_db->rel_arr,
-                                                                               
  "pg_toast", new_name);
- 
-                       create_rel_filename_map(old_pgdata, new_pgdata, old_db,
-                                       new_db, old_toast, new_toast, maps + 
num_maps);
-                       num_maps++;
-               }
        }
  
        *nmaps = num_maps;
--- 57,62 ----
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to