Hi,
I hav a db as specified in nit.sql
flows has 763488 entries.
After dropping/creating/loading the db and running auswert.sh I get
the attached result from query1.txt.
After 'VACUUM ANALYZE' I get the results from query2.txt
As you can see, the indexes are not used any longer.
Why?
Bye/2
---
Michael Reifenberger, Business Unit Manager SAP-Basis, Plaut Consulting
Comp: [EMAIL PROTECTED] | Priv: [EMAIL PROTECTED]
http://www.plaut.de | http://www.Reifenberger.com
CREATE TABLE flows (
date date,
sip inet,
dip inet,
"type" integer,
sport_low integer[],
dport_low integer[],
sport_high integer[],
dport_high integer[],
sdpkgs integer,
dspkgs integer,
sdbytes integer,
dsbytes integer,
"first" timestamp without time zone,
"last" timestamp without time zone
);
CREATE TABLE protos (
pnum integer,
pname text
);
CREATE TABLE networks (
nnet cidr,
nname text
);
CREATE TABLE volumen (
"year" integer,
week integer,
snet inet,
dnet inet
);
CREATE UNIQUE INDEX flows_0 ON flows USING btree (date, sip, dip, "type");
CREATE UNIQUE INDEX networks_0 ON networks USING btree (nnet);
CREATE INDEX flows_1 ON flows USING btree ("type");
CREATE UNIQUE INDEX protos_0 ON protos USING btree (pnum);
CREATE INDEX flows_2 ON flows USING btree (date);
vdat='2003-07-01'
bdat='2003-08-01'
# Auswertung tabelleninhalt
echo "#"
echo "# Anzahl S�tze in der DB, von Datum, bis Datum, Anzahl Tage"
echo "#"
psql -U pgsql nitpicker << EOF1
explain select count(*),
min(date) as Von, max(date) as Bis, max(date)-min(date) as Tage
from flows;
EOF1
# Auswertung Tage, Protokoll, Bytes, Pakete aufsummiert
echo "#"
echo "# Tagesstatistik je Protokoll"
echo "#"
psql -U pgsql nitpicker << EOF2
explain select f.date as datum, p.pname as Protokoll,
sum(f.sdbytes) + sum(f.dsbytes) as Bytes,
sum(f.sdpkgs) + sum(f.dspkgs) as Pakete
from protos p, flows f
where
f.date >= '$vdat' and f.date <= '$bdat' and
f.type = p.pnum
group by datum, protokoll
order by datum, Bytes desc
;
EOF2
echo "#"
echo "# Tagesstatistik gesamt"
echo "#"
psql -U pgsql nitpicker << EOF3
explain select f.date as datum,
sum(f.sdbytes) + sum(f.dsbytes) as Bytes,
sum(f.sdpkgs) + sum(f.dspkgs) as Pakete
from flows f
where
f.date >= '$vdat' and f.date <= '$bdat'
group by datum
order by datum, Bytes desc
;
EOF3
echo "#"
echo "# Top 50: max( Bytes )"
echo "#"
psql -U pgsql nitpicker << EOF4
explain select count(*),
network(set_masklen(sip,16)), network(set_masklen(dip,16)),
sum(sdbytes) + sum(dsbytes) as bytes
from flows f
where
f.date >= '$vdat' and f.date <= '$bdat'
group by network(set_masklen(sip,16)), network(set_masklen(dip,16))
order by bytes desc
limit 50;
EOF4
#psql -U pgsql nitpicker << EOF
# select
# -- n1.nname as src, n2.nname as dst
# count(*)
# from flows f, networks n1, networks n2, protos p
# where
# f.sip << n1.nnet and
# f.dip << n2.nnet
#EOF
#psql -U pgsql nitpicker -c "\
#select \
# f.sip, f.dip, p.pname, \
# f.sdpkgs, f.dspkgs \
# from flows f, protos p where \
# not f.sip << '194.39.177.0/24' and \
# not f.sip << '194.99.75.0/24' and \
# not f.sip = '195.212.179.2' and \
# not f.dip << '194.39.177.0/24' and \
# not f.dip << '194.99.75.0/24' and \
# not f.dip = '195.212.179.2' and \
# f.type = p.pnum \
# order by sip, dip;"
#psql -U pgsql nitpicker << EOFX
# drop table volumen;
# create table volumen (
# year int4,
# week int4,
# snet inet,
# dnet inet
# );
# drop table n_temp;
# create table n_temp (
# year int4,
# week int4,
# snet inet,
# dnet inet,
# sdbytes int8,
# dsbytes int8,
# sdpkgs int8,
# dspkgs int8
# );
#EOFX
#date
#psql -U pgsql nitpicker << EOF
# insert into n_temp
# select
# date_part( 'year', f.date ) ,
# date_part( 'week', f.date ),
# n.nnet, network( set_masklen( f.dip, 16 ) ),
# sum( f.sdbytes ), sum( f.dsbytes ),
# sum( f.sdpkgs ), sum( f.dspkgs )
# from flows f, networks n
# where
# f.sip <<= n.nnet and
# not f.dip <<= n.nnet
# group by f.date, n.nnet, network( set_masklen( f.dip, 16 ) )
# union
# select
# date_part( 'year', f.date ) ,
# date_part( 'week', f.date ),
# n.nnet, network( set_masklen( f.sip, 16 ) ),
# sum( f.dsbytes ), sum( f.sdbytes ),
# sum( f.dspkgs ), sum( f.sdpkgs )
# from flows f, networks n
# where
# f.dip <<= n.nnet and
# not f.sip <<= n.nnet
# group by f.date, n.nnet, network( set_masklen( f.sip, 16 ) )
# union
# select
# date_part( 'year', f.date ) ,
# date_part( 'week', f.date ),
# network( set_masklen( f.sip, 16 ) ), network( set_masklen( f.dip, 16 ) ),
# sum( f.sdbytes ), sum( f.dsbytes ),
# sum( f.sdpkgs ), sum( f.dspkgs )
# from flows f, networks n
# where
# not f.sip <<= n.nnet and
# not f.dip <<= n.nnet
# group by f.date, network( set_masklen( f.sip, 16 ) ), network( set_masklen( f.dip,
16 ) )
#EOF
#date
# f.date >= '2003-03-28' and f.date <= '2003-04-01'
#
# Anzahl S�tze in der DB, von Datum, bis Datum, Anzahl Tage
#
QUERY PLAN
--------------------------------------------------------------------
Aggregate (cost=41037.49..41037.50 rows=1 width=4)
-> Seq Scan on flows (cost=0.00..31493.88 rows=763488 width=4)
(2 rows)
#
# Tagesstatistik je Protokoll
#
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort (cost=14991.34..14991.66 rows=131 width=52)
Sort Key: f.date, (sum(f.sdbytes) + sum(f.dsbytes))
-> HashAggregate (cost=14984.76..14986.73 rows=131 width=52)
-> Hash Join (cost=2.62..14947.53 rows=2482 width=52)
Hash Cond: ("outer"."type" = "inner".pnum)
-> Index Scan using flows_2 on flows f (cost=0.00..14786.46 rows=3818
width=24)
Index Cond: ((date >= '2003-07-01'::date) AND (date <=
'2003-08-01'::date))
-> Hash (cost=2.30..2.30 rows=130 width=36)
-> Seq Scan on protos p (cost=0.00..2.30 rows=130 width=36)
(9 rows)
#
# Tagesstatistik gesamt
#
QUERY PLAN
-------------------------------------------------------------------------------------------
Sort (cost=14834.22..14834.23 rows=2 width=20)
Sort Key: date, (sum(sdbytes) + sum(dsbytes))
-> GroupAggregate (cost=0.00..14834.21 rows=2 width=20)
-> Index Scan using flows_2 on flows f (cost=0.00..14786.46 rows=3818
width=20)
Index Cond: ((date >= '2003-07-01'::date) AND (date <=
'2003-08-01'::date))
(5 rows)
#
# Top 50: max( Bytes )
#
QUERY PLAN
-------------------------------------------------------------------------------------------------
Limit (cost=14884.07..14884.20 rows=50 width=72)
-> Sort (cost=14884.07..14884.58 rows=201 width=72)
Sort Key: (sum(sdbytes) + sum(dsbytes))
-> HashAggregate (cost=14872.36..14876.38 rows=201 width=72)
-> Index Scan using flows_2 on flows f (cost=0.00..14824.64 rows=3818
width=72)
Index Cond: ((date >= '2003-07-01'::date) AND (date <=
'2003-08-01'::date))
(6 rows)
#
# Anzahl S�tze in der DB, von Datum, bis Datum, Anzahl Tage
#
QUERY PLAN
--------------------------------------------------------------------
Aggregate (cost=41037.49..41037.50 rows=1 width=4)
-> Seq Scan on flows (cost=0.00..31493.88 rows=763488 width=4)
(2 rows)
#
# Tagesstatistik je Protokoll
#
QUERY PLAN
---------------------------------------------------------------------------------------------------
Sort (cost=176464.17..176474.37 rows=4078 width=28)
Sort Key: f.date, (sum(f.sdbytes) + sum(f.dsbytes))
-> GroupAggregate (cost=163062.45..176219.62 rows=4078 width=28)
-> Sort (cost=163062.45..164933.31 rows=748343 width=28)
Sort Key: f.date, p.pname
-> Hash Join (cost=2.62..46539.09 rows=748343 width=28)
Hash Cond: ("outer"."type" = "inner".pnum)
-> Seq Scan on flows f (cost=0.00..35311.32 rows=748343
width=24)
Filter: ((date >= '2003-07-01'::date) AND (date <=
'2003-08-01'::date))
-> Hash (cost=2.30..2.30 rows=130 width=12)
-> Seq Scan on protos p (cost=0.00..2.30 rows=130
width=12)
(11 rows)
#
# Tagesstatistik gesamt
#
QUERY PLAN
---------------------------------------------------------------------------------------
Sort (cost=44666.89..44666.97 rows=32 width=20)
Sort Key: date, (sum(sdbytes) + sum(dsbytes))
-> HashAggregate (cost=44665.61..44666.09 rows=32 width=20)
-> Seq Scan on flows f (cost=0.00..35311.32 rows=748343 width=20)
Filter: ((date >= '2003-07-01'::date) AND (date <= '2003-08-01'::date))
(5 rows)
#
# Top 50: max( Bytes )
#
QUERY PLAN
---------------------------------------------------------------------------------------------------
Limit (cost=309333.47..309333.60 rows=50 width=30)
-> Sort (cost=309333.47..311204.33 rows=748343 width=30)
Sort Key: (sum(sdbytes) + sum(dsbytes))
-> GroupAggregate (cost=162968.11..189160.11 rows=748343 width=30)
-> Sort (cost=162968.11..164838.97 rows=748343 width=30)
Sort Key: network(set_masklen(sip, 16)), network(set_masklen(dip,
16))
-> Seq Scan on flows f (cost=0.00..42794.75 rows=748343
width=30)
Filter: ((date >= '2003-07-01'::date) AND (date <=
'2003-08-01'::date))
(8 rows)
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend