I've a client interested enough in $SUBJECT that they're willing to offer a bounty on it. An example of the pain is (working example attached):

create temp view denorm as
    select f.*, d1.t t1, d2.t t2
    from fact f
    left join dim d1 on f1=d1.id
    left join dim d2 on f2=d2.id
;

-- Fast
explain analyze select count(*) from denorm where 1 in (f1,f2);
explain analyze select count(*) from denorm where '1' in (t1);

-- Slow
explain analyze select count(*) from denorm where '1' in (t1,t2);

They currently work around this by doing a union:

select ... from denorm where t1 = '1'
union
select ... from denorm where t2 = '1'

... or depending on needs using IN instead of =.

AFAICT this can be transformed into a UNION (not all) if dim.id is unique. Does the upper planner pathification make this any easier? There's another transform using arrays that's possible as well (see attached example); I believe that would work regardless of uniqueness.

Just to be clear; the OR by itself is not a problem (as shown by the first fast query); it's the OR with the JOIN that's a problem.
--
Jim Nasby, Data Architect, Blue Treble Consulting, Austin TX
Experts in Analytics, Data Architecture and PostgreSQL
Data in Trouble? Get it in Treble! http://BlueTreble.com
855-TREBLE2 (855-873-2532)
create temp table fact as select generate_series(1,999) f1, 
generate_series(1,999) f2;
insert into fact select s,null from generate_series(1,999) s;
insert into fact select null,s from generate_series(1,999) s;
create index f_f1 on fact(f1);
create index f_f2 on fact(f2);
analyze fact;

create temp table dim as select s, s::text t from generate_series(1,999) s;
alter table dim add primary key(s), add unique (t);
create temp view denorm as
    select f.*, d1.t t1, d2.t t2, array[f1,f2] ident_ids
    from fact f
    left join dim d1 on f1=d1.s
    left join dim d2 on f2=d2.s
;

-- Fast
explain analyze select count(*) from denorm where 1 in (f1,f2);
explain analyze select count(*) from denorm where '1' in (t1);

-- Slow
explain analyze select count(*) from denorm where '1' in (t1,t2);

CREATE FUNCTION ident_ids(
    idents text[]
) RETURNS int[] STABLE LANGUAGE sql AS $$
SELECT array( SELECT s FROM dim WHERE t = ANY(idents) )
$$;
CREATE FUNCTION ident_ids(
    idents text
) RETURNS int[] STABLE LANGUAGE sql AS $$
SELECT ident_ids( ('{' || idents || '}')::text[] )
$$;

explain analyze select count(*) from denorm where ident_ids && 
ident_ids('42,84,128');

\echo ERROR OK here on version >= 10
create index fact__f_array on fact using gin ((array[f1,f2]) _int4_ops); -- pre 
10.0
\echo ERROR OK here on version < 10
create index fact__f_array on fact using gin ((array[f1,f2]) array_ops); -- 10.0

explain analyze select count(*) from denorm where ident_ids && 
ident_ids('42,84,128');
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to