[SQL] Looking for a way to sum integer arrays....

2005-04-22 Thread Tony Wasson
I'd like to be able to sum up an integer array. Like so:

  {3,2,1}
   + {0,2,2}
   ---
  {3,4,3}

The following solution I've been hacking on works, although I think it
is far from "ideal". Is there a built in way to sum up arrays? If not,
is there a better way than my crude method? I have tested this on 7.4
and 8.0. I'd also be appreciate if any insight on why my aggregate
fails to work when I have an empty initcondition.  P.S. I have never
written an aggregate and I was lost trying to follow the complex_sum
example in the docs.
-
CREATE OR REPLACE FUNCTION sum_intarray(INTEGER[],INTEGER[]) RETURNS
INTEGER[] LANGUAGE 'plpgsql' AS '
/*
|| Author: Tony Wasson
||
|| Overview: Experiment with arrays and aggregates
||  3,2,1
||+ 0,2,2
|| ---
||  3,4,3
||
|| Revisions: (when, who, what)
||  2005/04/21 -- TW - Create function
*/
DECLARE
inta1   ALIAS FOR $1;
inta2   ALIAS FOR $2;
out_arr INTEGER[];
out_arr_textTEXT := '''';
i   INTEGER;
nextnum INTEGER;
BEGIN
FOR i IN array_lower(inta1, 1)..array_upper(inta1, 1)
LOOP
RAISE NOTICE ''looking at element %'',i;
nextnum := COALESCE(inta1[i],0) + COALESCE(inta2[i],0);
RAISE NOTICE ''nextnum %'',nextnum;
out_arr_text := out_arr_text || nextnum::TEXT || '','';
RAISE NOTICE ''text %'',out_arr_text;
END LOOP;
RAISE NOTICE ''text %'',out_arr_text;
--drop the last comma
IF SUBSTRING(out_arr_text,length(out_arr_text),1) =  '','' THEN
out_arr_text := substring(out_arr_text,1,length(out_arr_text)-1);
END IF;
out_arr_text := ''{'' || out_arr_text || ''}'';
RAISE NOTICE ''text %'',out_arr_text;
out_arr := out_arr_text;
RAISE NOTICE ''out_arr %'',out_arr;
RETURN out_arr;
END
';

SELECT sum_intarray('{1,2}','{2,3}');
SELECT sum_intarray('{3,2,1}','{0,2,2}');

--- Now I make a table to demonstrate an aggregate on

CREATE TABLE arraytest (
id character varying(10) NOT NULL,
somearr integer[]
);

INSERT INTO arraytest (id, somearr) VALUES ('a', '{1,2,3}');
INSERT INTO arraytest (id, somearr) VALUES ('b', '{0,1,2}');


CREATE AGGREGATE sum_integer_array (
sfunc = sum_intarray,
basetype = INTEGER[],
stype = INTEGER[],
initcond = 
'{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
);

--

# SELECT sum_integer_array(somearr) FROM arraytest;
 sum_integer_array
-
 {1,3,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

Thanks in advance to anyone who reads this far.

Tony Wasson
[EMAIL PROTECTED]

---(end of broadcast)---
TIP 3: if posting/reading through Usenet, please send an appropriate
  subscribe-nomail command to [EMAIL PROTECTED] so that your
  message can get through to the mailing list cleanly


Re: [SQL] Looking for a way to sum integer arrays....

2005-04-29 Thread Tony Wasson
Thank you for the responses! 

To recap: pl/r array support works very well. In my case, I am looking
for pl/pgsql solution.

I also got this nice function from dennisb on the #postgresql irc
channel, which seems extremely "clean" and works with 7.4/8.0. My
original function didn't handle a blank initcond in the aggregate
gracefully.

CREATE OR REPLACE FUNCTION array_add(int[],int[]) RETURNS int[] AS '
  DECLARE
x ALIAS FOR $1;
y ALIAS FOR $2;
a int;
b int;
i int;
res int[];
  BEGIN
res = x;

a := array_lower (y, 1);
b := array_upper (y, 1);

IF a IS NOT NULL THEN
  FOR i IN a .. b LOOP
res[i] := coalesce(res[i],0) + y[i];
  END LOOP;
END IF;

RETURN res;
  END;
'
LANGUAGE plpgsql STRICT IMMUTABLE;

--- then this aggregate lets me sum integer arrays...

CREATE AGGREGATE sum_integer_array (
sfunc = array_add,
basetype = INTEGER[],
stype = INTEGER[],
initcond = '{}'
);


Here's how my sample table looked  and my new array summing aggregate
and function:

#SELECT * FROM arraytest ;
 id | somearr
+-
 a  | {1,2,3}
 b  | {0,1,2}
(2 rows)

#SELECT sum_integer_array(somearr) FROM arraytest ;
 sum_integer_array
-------
 {1,3,5}
(1 row)


Tony Wasson

---(end of broadcast)---
TIP 8: explain analyze is your friend


Re: [SQL] default value for select?

2005-05-09 Thread Tony Wasson
On 5/9/05, Mark Fenbers <[EMAIL PROTECTED]> wrote:
>  I want to update a column in myTable.  The value this column is set to
> depends on a nested select statement which sometimes returns 0 rows instead
> of 1.  This is a problem since the column I'm trying to update is set to
> refuse nulls.  Here's a sample:
>  
>  update myTable set myColumn = (Select altColumn from altTable where
> altColumn != 'XXX' limit 1) where myColumn = 'XXX';
>  
>  MyColumn cannot accept nulls, but sometimes "Select altColumn ..." returns
> 0 rows, and thus, the query fails.  
>  
>  Is there a way to set a default value to be inserted into myColumn if and
> when "select altColumn ..." returns zero rows?
>  
>  Mark

Mark, 
You can work around this by using a CASE statement. In this case, test
for a NULL from your subquery. This is not elegant at all, but it
should do what you are wanting.

update myTable set myColumn = (CASE
  WHEN (Select altColumn from altTable where  altColumn != 'XXX'
limit 1) IS NULL
THEN 'some default value'
  ELSE (Select altColumn from altTable where  altColumn != 'XXX' limit 1)
  END)
where myColumn = 'XXX';

Hope this helps...
Tony

---(end of broadcast)---
TIP 5: Have you checked our extensive FAQ?

   http://www.postgresql.org/docs/faq


Re: [SQL] Help: Function for splitting VARCHAR column and migrating its data to 2 new tables

2005-05-24 Thread Tony Wasson
able('%');
The input goes to a SIMILAR TO on id - and % means all elements. You
can match a single row by using something like SELECT
build_keyword_table('123');

I also used this as my test data... It worked for me!

CREATE TABLE user_data (
 id   SERIAL,
 user_id      INTEGER,
 keywords VARCHAR(256) NOT NULL,
 add_date TIMESTAMP,
 PRIMARY KEY(id)
);

INSERT INTO user_data (keywords) VALUES ('new york,san francisco,
dallas, food');
INSERT INTO user_data (keywords) VALUES ('phoenix, hot, summer, fun');

CREATE TABLE keyword (
 name VARCHAR(64) NOT NULL,
 id   SERIAL,
 add_date TIMESTAMP,
 PRIMARY KEY(name)
);
-- todo put a UNIQUE INDEX on keyword (id)

I hope this helps.
Tony Wasson

---(end of broadcast)---
TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]


Re: [SQL] 3 tables, slow count(*), order by Seq Scan in Query Plan

2005-05-27 Thread Tony Wasson
On 5/26/05, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:
> Hello,
> 
> I have 3 tables (2 tables + 1 lookup table that ties them) and running
> a straight-forward aggregate count(*) query with a couple of joins
> takes about 10 seconds (and I need it to be sub-second or so).
> Also, I am wondering if this approach is scalable with my row-counts
> and my hardware (below).
> 
> My slow query is this:
> --
> SELECT keyword.name, count(*)
> FROM user_data, user_data_keyword, keyword
> WHERE (user_data.user_id = 1)
>   AND (user_data.id = user_data_keyword.user_data_id)
>   AND (user_data_keyword.keyword_id = keyword.id)
>   GROUP BY keyword.name
>   ORDER BY COUNT(*) DESC LIMIT 10;



> Is there any way of speeding up my query?
> 
> Also, given the number of rows expected in those tables:
> user_data: 10M
> user_data_keyword: 40M
> keyword:4M

This sounds like a perfect candidate for a summary table. You should
read Jonathan Gardner's writeup about materialized views. Depending on
your requirements, you'll either need to build triggers or a periodic
summarization you run. This sounds like a "top N" report so a periodic
update out to work.

http://jonathangardner.net/PostgreSQL/materialized_views/matviews.html

Tony Wasson

---(end of broadcast)---
TIP 8: explain analyze is your friend


Re: [SQL] left joins

2005-07-06 Thread Tony Wasson
On 7/6/05, Ragnar Hafstaư <[EMAIL PROTECTED]> wrote:
> On Wed, 2005-07-06 at 11:33 +0100, Nick Stone wrote:
> > I've had exactly yhe same problem - try changing the query to.
> >
> > select count(*)
> >  from  h left join p using (r,pos) and p.r_order=1
> > where h.tn > 20
> > and h.tn < 30
> 
> really ? is this legal SQL ?
> is this a 8.0 feature ?
> I get syntax error at or near "and" at character 41
> 

SQL like this works for me when I write it with an ON statement
instead of a USING.

select count(*)
 from  h left join p on (h.r=p.r and h.pos=r.pos and p.r_order=1)
 where h.tn > 20
 and h.tn < 30

Filtering within the join condition is very useful when doing a left outer join.

Here's another example "from the book" doing this type of filter
within the join:
http://www.postgresql.org/docs/8.0/interactive/queries-table-expressions.html

Tony Wasson

---(end of broadcast)---
TIP 2: you can get off all lists at once with the unregister command
(send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])


Re: [SQL] funstions for parsing words

2005-07-19 Thread Tony Wasson
On 7/19/05, John Kopanas <[EMAIL PROTECTED]> wrote:
> I have a table called Phrases that holds the text of a phrase. I want
> write a query that will return all the words found in all the text of
> the Phrases. Like so:
> 
> 
> Phrases:
> 
> "Hello World"
> "Goodbye World"
> "I like candy
> 
> Words (select statement result):
> 
> "Hello"
> "World"
> "Goodbye"
> "I"
> "Like"
> "Candy"
> 
> Is anything like this possible?
> 
> Thanks alot.
> 
> Your Friend,
> 
> John Kopanas

You can do this by using array_to_string and using a space as your
delimiter. If you need to trim the quotes use the trim function also.

You can also see the split_on_commas example below -- you'd want to
split on a space. I would also rewrite this to use array_to_string or
use pl/perl if you can.

http://archives.postgresql.org/pgsql-sql/2005-05/msg00204.php

Hope this helps.
Tony Wasson

---(end of broadcast)---
TIP 2: Don't 'kill -9' the postmaster


Re: [SQL] echo/printf function in plpgsql

2005-07-19 Thread Tony Wasson
On 7/19/05, Andreas Joseph Krogh <[EMAIL PROTECTED]> wrote:
> On Tuesday 19 July 2005 17:18, Richard Huxton wrote:
> > Andreas Joseph Krogh wrote:
> > > Hi all!
> > >
> > > Is there a way of echo'ing a string(like "raise notice 'this is id%',
> > > id") from plpgsql? I want to echo/print it to STDOUT 'cause the
> > > notice-mechanism produces too much noise IMH.
> >
> > Your function is running in the backend. You don't have a STDOUT
> > (although you might have redirected STDERR for logging).
> 
> I see. Can I make the ouput somehow less verbose? It spits out a lot of noise
> for each "NOTICE":

You can control the severity messages sent to your client by first
setting client_min_message.

Try SET client_min_messages = WARNING;

http://www.postgresql.org/docs/8.0/interactive/runtime-config.html

Tony

---(end of broadcast)---
TIP 6: explain analyze is your friend


Re: [SQL] Multi-column returns from pgsql

2005-07-22 Thread Tony Wasson
On 7/22/05, Jim Buttafuoco <[EMAIL PROTECTED]> wrote:
> Mark,
> 
> Instead of  RETURN NEXT rec.txt1; RETURN NEXT rec.txt2; just use RETURN NEXT 
> rec;
> 
> then your select statement would be
> select * from my_func() as (txt1 text,txt2 text);
> 
> Jim

Besides a simple RETURN NEXT, you'll need to return a SETOF some
composite type. You can do something like

CREATE TYPE twotexts_t AS (txt1 TEXT, txt2 TEXT); 

CREATE OR REPLACE FUNCTION my_func() returns SETOF twotexts_t AS '
DECLARE
   rec record;
BEGIN
   FOR rec IN SELECT txt1, txt2 FROM mytable LOOP
  RETURN NEXT
   END LOOP;
   RETURN;
END;' language 'plpgsql';

---(end of broadcast)---
TIP 6: explain analyze is your friend


Re: [SQL] psql client: technique for applying default values to :variables?

2006-01-11 Thread Tony Wasson
On 1/11/06, Jeff Boes <[EMAIL PROTECTED]> wrote:
> Stumped: is there any way to set up default values for psql variables
> within the .SQL file itself? Obviously, I can do something like:
>
> $ psql -f my_script -v MYVAR=${myvar:-mydefault}
>
> but I would prefer to have the value stored with the .SQL file, e.g. (if
> this actually worked):
>
> \set MYVAR COALESCE(:MYVAR,'mydefault')

Stuff like this works for me in a SQL file

\set edate 'CURRENT_DATE::DATE'

SELECT * FROM some_table WHERE update_date = :edate;

---(end of broadcast)---
TIP 1: if posting/reading through Usenet, please send an appropriate
   subscribe-nomail command to [EMAIL PROTECTED] so that your
   message can get through to the mailing list cleanly


Re: [SQL] Function Dependency

2006-02-05 Thread Tony Wasson
On 2/1/06, Padam J Singh <[EMAIL PROTECTED]> wrote:
>  Hello,
>
>  I am maintaining an application that has over 400 procedures and functions
> written in plsql, and around 100 tables.
>  I want to generate a function dependency chart to depict the following:
>
>  1. Inter function/procedure dependencies
>  2. function-tables dependencies
>  3. function-sequences depencies
>
>  Is there a standard method of doing this?
>
>  Thanks in advance,
>  Padam.

I too would be interested in a standardized tool to do this. I had a
similar situation and I ended up writing a perl script to parse my SQL
and make a graphviz dot file. I then used graphviz to make a function
dependency chart. I can't promise it would catch every single case,
but I can provide you with the code if you wish to give it a whirl.

Tony Wasson

---(end of broadcast)---
TIP 4: Have you searched our list archives?

   http://archives.postgresql.org


Re: [SQL] help..postgresql mulyiple return values

2006-05-18 Thread Tony Wasson

On 5/12/06, Michael Joseph Tan <[EMAIL PROTECTED]> wrote:


hi,
im new in postgresql, generally new in databases.

im trying to make a function using PGAdminIII which returns several types,
example, my query is:

"select count(id) as requests, organization from connection_requests group
by organization"

id is of type int8, organization is of type varchar(50).

basically the query would return coun(id), and a varchar(50) which is
organization.

i really dont know what to put on the return type.

what would be the best solution?



If you are in 8.1 you can follow this example from the documentation.
http://www.postgresql.org/docs/8.1/interactive/plpgsql-declarations.html

CREATE FUNCTION sum_n_product(x int, y int, OUT sum int, OUT prod int) AS $$
BEGIN
   sum := x + y;
   prod := x * y;
END;
$$ LANGUAGE plpgsql;

Then run it like:

SELECT sum, prod FROM sum_n_product(1,2);

---(end of broadcast)---
TIP 6: explain analyze is your friend


Re: [SQL] How to pass array of values to a stored procedure

2006-07-18 Thread Tony Wasson

On 7/18/06, Curtis Scheer <[EMAIL PROTECTED]> wrote:

Does anyone have any examples of how I would make a stored procedure in
plpgsql that would allow for passing a list or arrays of values to be used
in an sql IN clause?  Like so: select * from table where field1 in (values).



Here's a very simple example. However, I think passing arrays of INTs
around is dirty. I wasn't able to do this without a FOR ...IN EXECUTE
statement.

CREATE TABLE ids
(
   id  INTEGER
   , PRIMARY KEY (id)
);

INSERT INTO ids VALUES (1);
INSERT INTO ids VALUES (2);
INSERT INTO ids VALUES (3);

CREATE OR REPLACE FUNCTION example_array_input(INT[]) RETURNS SETOF ids AS
$BODY$
DECLARE
   in_clause ALIAS FOR $1;
   clause  TEXT;
   rec RECORD;
BEGIN
   -- conver the array to a text string and make it LOOK like an
IN statement
   clause := in_clause;
   clause := trim(leading '{' FROM clause);
   clause := trim(trailing '}' FROM clause);

   FOR rec IN EXECUTE 'SELECT id FROM ids WHERE id IN (' || clause || ');'
   LOOP
   RETURN NEXT rec;
   END LOOP;
   -- final return
   RETURN;
END
$BODY$ language plpgsql;

SELECT * FROM example_array_input('{1,2,4,5,6}'::INT[]);

---(end of broadcast)---
TIP 5: don't forget to increase your free space map settings


Re: [SQL] How to pass array of values to a stored procedure

2006-07-18 Thread Tony Wasson

On 7/18/06, Tony Wasson <[EMAIL PROTECTED]> wrote:

On 7/18/06, Curtis Scheer <[EMAIL PROTECTED]> wrote:
> Does anyone have any examples of how I would make a stored procedure in
> plpgsql that would allow for passing a list or arrays of values to be used
> in an sql IN clause?  Like so: select * from table where field1 in (values).
>


Ahhh... Here's an example using Tom's recommended field=ANY (arrayvalue) SQL.


CREATE TABLE ids
(
   id  INTEGER
   , PRIMARY KEY (id)
);

INSERT INTO ids VALUES (1);
INSERT INTO ids VALUES (2);
INSERT INTO ids VALUES (3);

CREATE OR REPLACE FUNCTION example_array_input(INT[]) RETURNS SETOF ids AS
$BODY$
DECLARE
   in_clause ALIAS FOR $1;
   clause  TEXT;
   rec RECORD;
BEGIN
   FOR rec IN SELECT id FROM ids WHERE id = ANY(in_clause)
   LOOP
   RETURN NEXT rec;
   END LOOP;
   -- final return
   RETURN;
END
$BODY$ language plpgsql;

SELECT * FROM example_array_input('{1,2,4,5,6}'::INT[]);

---(end of broadcast)---
TIP 6: explain analyze is your friend


Re: [SQL] Remote monitoring of Postgres w/minimal grants

2010-03-11 Thread Tony Wasson
On Wed, Mar 10, 2010 at 12:26 AM, Bryce Nesbitt wrote:

> I'm setting up remote monitoring of postgres, but running into an
> uncomfortable situation with permissions.
> Basically it seems hard to set up a secure "read only" role, yet also allow
> proper monitoring.
>
> A brief writeup of that is here:
>
> http://help.logicmonitor.com/installation-getting-started/notes-for-monitoring-specific-types-of-hosts/databases/postgresql/postgresql-credentials/
> In order to get accurate server busy stats and max query time, the
> LogicMonitor user needs to be a superuser "alter role logicmonitor
> superuser;". Without the SuperUser privilege, all servers will appear busy,
> and maximum query time will always be 0.
>
> Is there a way to grant the type of permission needed to view stats,
> without superuser?
>

Seems like you could get around most of these cases by making a function or
set returning function to return the data and making it "security definer"
and then grant your monitoring user access to that.

Tony


Re: [SQL] accounting schema

2008-02-07 Thread Tony Wasson
On Feb 6, 2008 6:08 PM, Medi Montaseri <[EMAIL PROTECTED]> wrote:
> I am learning my way into Accounting and was wondering how Accounting
> applications are designed. perhaps you could point the way


> As a DBA, (and keeping it simple) I am thinking I need a table for every
> account which migh look like
>
> id, description, credit, debit, validated, created_on, created_by,
> modified_on, modified_by
>
> Is that pretty match it ?
> Please let me know if you have seen some accounting or DB book that
> addresses this problem domain.

Another codebase to look at is http://www.sql-ledger.org/. It uses postgresql.

Regards,
Tony

---(end of broadcast)---
TIP 5: don't forget to increase your free space map settings


Re: [SQL] Can COPY update or skip existing records?

2008-10-01 Thread Tony Wasson
On Tue, Sep 30, 2008 at 5:16 AM, Glenn Gillen <[EMAIL PROTECTED]> wrote:
> Hey all,
>
> I've got a table with a unique constraint across a few fields which I
> need to regularly import a batch of data into. Is there a way to do it
> with COPY without getting conflicts on the unique contraint? I have no
> was of being certain that some of the data I'm trying to load isn't in
> the table already.
>
> Ideally I'd like it to operate like MySQL's on_duplicate_key_update
> option, but for now I'll suffice with just ignoring existing rows and
> proceeding with everything else.

I ran into a similar problem. I'm using these merge_by_key functions:

http://pgfoundry.org/projects/mbk

Here's a quick example...

CREATE TEMP TABLE foo (LIKE dst INCLUDING DEFAULTS);

COPY foo (c1, c2) FROM STDIN;
(your copy data here)
\.

SELECT * FROM merge_by_key(
'public', -- table schema
'dst', -- table name
'mnew.c2 < mold.c2', -- merge condition
'select c1,c2 FROM foo'
);

Disclaimer: The author is a friend of mine. :-)

-- 
Sent via pgsql-sql mailing list (pgsql-sql@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-sql


Re: [SQL] grouping/clustering query

2008-10-24 Thread Tony Wasson
On Fri, Oct 24, 2008 at 8:04 AM, Steve Midgley <[EMAIL PROTECTED]> wrote:
> At 11:28 AM 10/23/2008, Joe wrote:
>>
>> Steve Midgley wrote:

 # (invoiceid, txid)
 (A, 1)
 (A, 3)
 (B, 1)
 (B, 2)
 (C, 5)
 (D, 6)
 (D, 7)
 (E, 8)
 (F, 8)

 For journalling, I need to group/cluster this together. Is there a SQL
 query that can generate this output:

 # (journal: invoiceids, txids)
 [A,B] , [1,2,3]
 [C], [5]
 [D], [6,7]
 [E,F], [8]
>>>
>>> Hi Dave,
>>>
>>> I'm not following the logic here. A has 1,3 and B has 1,2. So why does
>>> the first line print:
>>>
 [A,B] , [1,2,3]
>>>
>>> What's the rule that tells the query to output this way? Is it that all
>>> of B's values are between A's values?
>>
>> From a purely accounting standpoint, since transaction 1 was applied to
>> both invoices A and B, you need to group the invoices so that you can
>> compare total invoiced against total paid.
>
> I tinkered around briefly but didn't come up with a good idea, but I bet
> someone on this list can. However, I did create a CREATE script for your
> table design which, in my experience, makes it more likely that a real
> expert will take on your problem..
>
> Hope this helps,
>
> Steve
>
> DROP TABLE IF EXISTS trans;
>
> CREATE TABLE trans
> (
>  id serial NOT NULL,
>  inv_id character varying,
>  tx_id character varying,
>  CONSTRAINT pk_id PRIMARY KEY (id)
> )
> WITH (OIDS=FALSE);
>
> insert into trans (inv_id, tx_id) values('A','1');
> insert into trans (inv_id, tx_id) values('A','3');
> insert into trans (inv_id, tx_id) values('B','1');
> insert into trans (inv_id, tx_id) values('B','2');
> insert into trans (inv_id, tx_id) values('C','5');
> insert into trans (inv_id, tx_id) values('D','6');
> insert into trans (inv_id, tx_id) values('D','7');
> insert into trans (inv_id, tx_id) values('E','8');
> insert into trans (inv_id, tx_id) values('F','8');

This is as close as I can get the data. I think I'd need a custom
array grouping aggregate to get the results to match completely.
Notice how ABC are on their own lines?

test=# SELECT inv_array, tx_array
FROM (
SELECT tx_id, array_accum(inv_id) AS inv_array
FROM trans
GROUP BY tx_id
ORDER BY tx_id
 ) AS t
JOIN (
SELECT inv_id, array_accum(tx_id) AS tx_array
FROM trans
GROUP BY inv_id
ORDER BY inv_id
 ) AS i ON (t.tx_id = ANY(i.tx_array) OR i.inv_id =ANY(t.inv_array))
GROUP BY tx_array,inv_array
;
 inv_array | tx_array
---+--
 {A,B} | {1,2}
 {B}   | {1,2}
 {A}   | {1,3}
 {A,B} | {1,3}
 {C}   | {5}
 {D}   | {6,7}
 {E,F} | {8}
(7 rows)

-- 
Sent via pgsql-sql mailing list (pgsql-sql@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-sql


Re: [SQL] grouping/clustering query

2008-10-24 Thread Tony Wasson
On Fri, Oct 24, 2008 at 10:24 AM, Tony Wasson <[EMAIL PROTECTED]> wrote:
> On Fri, Oct 24, 2008 at 8:04 AM, Steve Midgley <[EMAIL PROTECTED]> wrote:
>> At 11:28 AM 10/23/2008, Joe wrote:
>>>
>>> Steve Midgley wrote:
>>>>>
>>>>> # (invoiceid, txid)
>>>>> (A, 1)
>>>>> (A, 3)
>>>>> (B, 1)
>>>>> (B, 2)
>>>>> (C, 5)
>>>>> (D, 6)
>>>>> (D, 7)
>>>>> (E, 8)
>>>>> (F, 8)
>>>>>
>>>>> For journalling, I need to group/cluster this together. Is there a SQL
>>>>> query that can generate this output:
>>>>>
>>>>> # (journal: invoiceids, txids)
>>>>> [A,B] , [1,2,3]
>>>>> [C], [5]
>>>>> [D], [6,7]
>>>>> [E,F], [8]
>>>>
>>>> Hi Dave,
>>>>
>>>> I'm not following the logic here. A has 1,3 and B has 1,2. So why does
>>>> the first line print:
>>>>
>>>>> [A,B] , [1,2,3]
>>>>
>>>> What's the rule that tells the query to output this way? Is it that all
>>>> of B's values are between A's values?
>>>
>>> From a purely accounting standpoint, since transaction 1 was applied to
>>> both invoices A and B, you need to group the invoices so that you can
>>> compare total invoiced against total paid.
>>
>> I tinkered around briefly but didn't come up with a good idea, but I bet
>> someone on this list can. However, I did create a CREATE script for your
>> table design which, in my experience, makes it more likely that a real
>> expert will take on your problem..
>>
>> Hope this helps,
>>
>> Steve
>>
>> DROP TABLE IF EXISTS trans;
>>
>> CREATE TABLE trans
>> (
>>  id serial NOT NULL,
>>  inv_id character varying,
>>  tx_id character varying,
>>  CONSTRAINT pk_id PRIMARY KEY (id)
>> )
>> WITH (OIDS=FALSE);
>>
>> insert into trans (inv_id, tx_id) values('A','1');
>> insert into trans (inv_id, tx_id) values('A','3');
>> insert into trans (inv_id, tx_id) values('B','1');
>> insert into trans (inv_id, tx_id) values('B','2');
>> insert into trans (inv_id, tx_id) values('C','5');
>> insert into trans (inv_id, tx_id) values('D','6');
>> insert into trans (inv_id, tx_id) values('D','7');
>> insert into trans (inv_id, tx_id) values('E','8');
>> insert into trans (inv_id, tx_id) values('F','8');


Here's a stab at a custom aggregate attempting to explain what's going
on inside.

CREATE OR REPLACE FUNCTION varchar_array_accum_unique (x VARCHAR[], y
VARCHAR) RETURNS VARCHAR[] AS $$
DECLARE
 res VARCHAR[];
BEGIN
 RAISE NOTICE 'input state is %',x;
 RAISE NOTICE 'input variable is %',y;
 IF x = '{}' THEN
RAISE NOTICE 'x is empty, returning input variable %',y;
res[1] := y;
 ELSE
RAISE NOTICE 'input array is not empty, checking if input
variable is a member %',y;
res := x;
IF y = ANY(res) THEN
RAISE NOTICE 'y is already in array %, skipping',res;
ELSE
res := array_append(res, y);
RAISE NOTICE 'appending input variable %',y;
END IF;
 END IF;
 RETURN res;
END
$$ LANGUAGE plpgsql STRICT;

DROP AGGREGATE array_accum_unique(VARCHAR);
CREATE AGGREGATE array_accum_unique
(
basetype = VARCHAR
, sfunc = varchar_array_accum_unique
, stype = VARCHAR[]
, initcond = '{}'
);

SELECT array_accum_unique(inv_id) AS invoiceids
 , array_accum_unique(tx_id) AS transactionids
FROM (
SELECT tx_id, inv_id
FROM trans
WHERE inv_id IN (
SELECT inv_id
FROM trans
WHERE id IN (
SELECT id FROM trans
WHERE tx_id=1
)
)
ORDER BY tx_id, inv_id
  ) AS ss
;


Returns this result for transaction id 1.

 invoiceids | transactionids
+
 {A,B}  | {1,2,3}
(1 row)

Hope this helps!
Tony

-- 
Sent via pgsql-sql mailing list (pgsql-sql@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-sql