Hi,

It is observed that, when we have one remote (huge) table and one local
(small) table and a join between them, then
 1. If the column type is text, then we push the join qual to the remote
    server, so that we will have less rows to fetch, and thus execution time
    is very less.
 2. If the column type is varchar, then we do not push the join qual to the
    remote server, resulting into large number of data fetch and thus
    execution time is very high.

Here is the EXPLAIN plan for such queries:

When VARCHAR column:

                                                                QUERY
PLAN
-------------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=100.15..4594935.73 rows=230 width=120) (actual
time=0.490..291.339 rows=1 loops=1)
   Output: a.ename, d.dname
   Join Filter: ((a.deptno)::text = (d.deptno)::text)
   Rows Removed by Join Filter: 100099
   ->  Index Scan using emp2_pk on public.emp2 a  (cost=0.15..8.17 rows=1
width=76) (actual time=0.009..0.013 rows=1 loops=1)
         Output: a.empno, a.ename, a.deptno
         Index Cond: (a.empno = '7369'::numeric)
   ->  Foreign Scan on public.fdw_dept2 d  (cost=100.00..4594353.50
rows=45925 width=120) (actual time=0.466..274.990 rows=100100 loops=1)
         Output: d.deptno, d.dname
         Remote SQL: SELECT deptno, dname FROM public.dept2
 Planning time: 0.697 ms
 Execution time: 291.467 ms
(12 rows)


When TEXT column:

                                                          QUERY
PLAN
------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=100.57..216.63 rows=238 width=120) (actual
time=0.375..0.378 rows=1 loops=1)
   Output: a.ename, d.dname
   ->  Index Scan using emp3_pk on public.emp3 a  (cost=0.15..8.17 rows=1
width=70) (actual time=0.010..0.011 rows=1 loops=1)
         Output: a.empno, a.ename, a.deptno
         Index Cond: (a.empno = '7369'::numeric)
   ->  Foreign Scan on public.fdw_dept3 d  (cost=100.42..208.45 rows=1
width=114) (actual time=0.362..0.362 rows=1 loops=1)
         Output: d.deptno, d.dname
         Remote SQL: SELECT deptno, dname FROM public.dept3 WHERE
(($1::text = deptno))
 Planning time: 1.220 ms
 Execution time: 0.498 ms
(10 rows)


Attached test script to reproduce this theory.

I have observed that, since we do not have an equality operator for VARCHAR
type, we convert VARCHAR to TEXT using RelabelType and use texteq operator
function.
However in foreign_expr_walker(), for T_RelabelType case, we have these
conditions which do not allow us push the qual to remote.

                /*
                 * RelabelType must not introduce a collation not derived
from
                 * an input foreign Var.
                 */
                collation = r->resultcollid;
                if (collation == InvalidOid)
                    state = FDW_COLLATE_NONE;
                else if (inner_cxt.state == FDW_COLLATE_SAFE &&
                         collation == inner_cxt.collation)
                    state = FDW_COLLATE_SAFE;
                else
                    state = FDW_COLLATE_UNSAFE;

I guess, since we do push qual to remote in case of TEXT, we should do the
same for VARCHAR too.

Also given that RelabelType are just dummy wrapper for binary compatible
types, can we simply set collation and state from its inner context instead
on above check block. Like

                /*
                 * Since RelabelType represents a "dummy" type coercion
between
                 * two binary-compatible datatypes, set collation and state
got
                 * from the inner_cxt.
                 */
                collation = inner_cxt.collation;
                state = inner_cxt.state;

Inputs/Thought?


-- 
Jeevan B Chalke
Principal Software Engineer, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
\c template1

-- Create database localdb and foreigndb;
CREATE DATABASE localdb;
CREATE DATABASE foreigndb;

-- Create tables in foreigndb
\c foreigndb

DROP TABLE IF EXISTS dept1; 

-- #Case 1, deptno datatype is NUMERIC. 
CREATE TABLE dept1 (
  deptno  NUMERIC(10) NOT NULL CONSTRAINT dept1_pk PRIMARY KEY,
  dname   VARCHAR(32) NOT NULL DEFAULT (md5(random()::VARCHAR))
);

INSERT INTO dept1 VALUES (generate_series(1,100100));

DROP TABLE IF EXISTS dept2; 

-- #Case 2, deptno datatype is VARCHAR.
CREATE TABLE dept2 (
  deptno  VARCHAR(10) NOT NULL CONSTRAINT dept2_pk PRIMARY KEY,
  dname   VARCHAR(32) NOT NULL DEFAULT (md5(random()::VARCHAR))
);

INSERT INTO dept2 VALUES (trim(to_char(generate_series(1,100100),'9999999')));

DROP TABLE IF EXISTS dept3; 

-- #Case 3, deptno datatype is TEXT.
CREATE TABLE dept3 (
  deptno  TEXT NOT NULL CONSTRAINT dept3_pk PRIMARY KEY,
  dname   VARCHAR(32) NOT NULL DEFAULT (md5(random()::VARCHAR))
);

INSERT INTO dept3 VALUES (trim(to_char(generate_series(1,100100),'9999999')));

-- create tables in localdb
\c localdb;

-- create extension postgres_fdw
CREATE EXTENSION postgres_fdw;

DROP TABLE IF EXISTS emp1; 

-- #Case 1, deptno datatype is NUMERIC.
CREATE TABLE emp1 (
  empno   NUMERIC(4) NOT NULL CONSTRAINT emp1_pk PRIMARY KEY,
  ename   VARCHAR(10),
  deptno  NUMERIC(10)
);
 
INSERT INTO emp1 VALUES (7369,  'SMITH',  '20');
INSERT INTO emp1 VALUES (7499,  'ALLEN',  '30');

DROP TABLE IF EXISTS emp2;

-- #Case 2, deptno datatype is varchar. 
CREATE TABLE emp2 (
  empno   NUMERIC(4) NOT NULL CONSTRAINT emp2_pk PRIMARY KEY,
  ename   VARCHAR(10),
  deptno  VARCHAR(10)
);

INSERT INTO emp2 VALUES (7369,  'SMITH',  '20');
INSERT INTO emp2 VALUES (7499,  'ALLEN',  '30');

DROP TABLE IF EXISTS emp3; 

-- #Case 3, deptno datatype is TEXT.
CREATE TABLE emp3 (
  empno   NUMERIC(4) NOT NULL CONSTRAINT emp3_pk PRIMARY KEY,
  ename   VARCHAR(10),
  deptno  TEXT
);
 
INSERT INTO emp3 VALUES (7369,  'SMITH',  '20');
INSERT INTO emp3 VALUES (7499,  'ALLEN',  '30');

DROP SERVER IF EXISTS pgfdwsrv cascade;

CREATE SERVER pgfdwsrv
  FOREIGN DATA WRAPPER postgres_fdw
  OPTIONS (host 'localhost', port '5432', dbname 'foreigndb', fdw_startup_cost '100', fdw_tuple_cost '100');

CREATE USER MAPPING FOR jeevan
  SERVER pgfdwsrv OPTIONS (user 'jeevan');

CREATE FOREIGN TABLE fdw_dept1
(
  deptno  NUMERIC(10),
  dname   VARCHAR(32)
) SERVER pgfdwsrv OPTIONS (schema_name 'public', table_name 'dept1', use_remote_estimate 'true');

CREATE FOREIGN TABLE fdw_dept2
(
  deptno  VARCHAR(10),
  dname   VARCHAR(32)
) SERVER pgfdwsrv OPTIONS (schema_name 'public', table_name 'dept2', use_remote_estimate 'true');

CREATE FOREIGN TABLE fdw_dept3
(
  deptno  TEXT,
  dname   VARCHAR(32)
) SERVER pgfdwsrv OPTIONS (schema_name 'public', table_name 'dept3', use_remote_estimate 'true');


-- #Case 1 Where condition is included in the Remote Query.
EXPLAIN (ANALYZE,VERBOSE)
  SELECT a.ename, d.dname
  FROM emp1 a,fdw_dept1 d
   WHERE d.deptno = a.deptno
   AND a.empno = 7369;

-- #Case 2 Where condition is not included in the Remote Query
EXPLAIN (ANALYZE,VERBOSE)
  SELECT a.ename, d.dname
  FROM emp2 a,fdw_dept2 d
   WHERE d.deptno = a.deptno
   AND a.empno = 7369;

-- #Case 3 Where condition is not included in the Remote Query
EXPLAIN (ANALYZE,VERBOSE)
  SELECT a.ename, d.dname
  FROM emp3 a,fdw_dept3 d
   WHERE d.deptno = a.deptno
   AND a.empno = 7369;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to