Re: Important bug in RAID tables

2001-06-19 Thread Tonu Samuel

On Tue, 19 Jun 2001, Patrizio Bruno wrote:

  BTW, why did you change the chunksize? By default it is 256000 and I
  tested it for speed with many different values. 256000 seemed to be best
  value. If you have any experience showing that I was wrong, please share
  it with me.
 
 I was testing different chunksize looking for the best on SGI's XFS,
 I will let you know some better value than 256000, if one exists :)


I got this result when I first put lower vales like 4kB and 8kB and found
that RAID takes too much of CPU time. When using RAID, instead of single
syscalls open/read/write/seek/tell/close mysql calls then many
times. Syscalls are very expensive in time in many operating systems and I
got no slowdown when I increased raid chunk value to be so high as
256000kBytes. I think this is the good value as such amount of data can be
fitted into hard disk hardware cache and can speed up writing do disk many
times. Because this I do not suggest to extend it also. Default value
should be good for most cases.

-- 
For technical support contracts, goto https://order.mysql.com/
   __  ___ ___   __
  /  |/  /_ __/ __/ __ \/ /Mr. Tonu Samuel [EMAIL PROTECTED]
 / /|_/ / // /\ \/ /_/ / /__   MySQL AB, Security Administrator
/_/  /_/\_, /___/\___\_\___/   Hong Kong, China
   ___/   www.mysql.com


-
Before posting, please check:
   http://www.mysql.com/manual.php   (the manual)
   http://lists.mysql.com/   (the list archive)

To request this thread, e-mail [EMAIL PROTECTED]
To unsubscribe, e-mail [EMAIL PROTECTED]
Trouble unsubscribing? Try: http://lists.mysql.com/php/unsubscribe.php




Re: Important bug in RAID tables

2001-06-19 Thread Patrizio Bruno

I can't reproduce the error using a different database configuration and
I can't use the same database schema to make my test (the machine is now in
production state), so I attached to this mail te sql file used to create
the tables, this database is used by ASPseek 1.2.0, you can download it
at www.aspseek.org, and the problem occurred indexing
http://lanazione.monrif.net (almost 3 millions documents, so there were almost
3 millions records in the table 'urlword')

-
Patrizio Bruno
DADA spa / Special Projects
Viale Giovine Italia
50122 Firenze
Italy
tel +39 055200211
fax +39 0552478143

PGP PublicKey available at: http://www.keyserver.net/en/
-


# This SQL script will create the necessary tables for ASPSeek.
# It should be used for MySQL only.
# To create the tables, use something like
# mysql -uasp -pmypassword aspseek  tables.sql
#
#drop table wordurl;

#drop table wordurl1;

#drop table urlword;

#drop table urlwords00;

#drop table urlwords01;

#drop table urlwords02;

#drop table urlwords03;

#drop table urlwords04;

#drop table urlwords05;

#drop table urlwords06;

#drop table urlwords07;

#drop table urlwords08;

#drop table urlwords09;

#drop table urlwords10;

#drop table urlwords11;

#drop table urlwords12;

#drop table urlwords13;

#drop table urlwords14;

#drop table urlwords15;

#drop table sites;

#drop table stat;

#drop table robots;

#drop table subsets;

#drop table spaces;

#drop table tmpurl;

#drop table wordsite;

#drop table citation;

#drop table countries;

#drop table cache;



create table wordurl(word tinyblob not null,

word_id integer auto_increment primary key, urls blob,

urlcount integer, totalcount integer,

unique index(word(64))) PACK_KEYS=1 DELAY_KEY_WRITE=1;

create table wordurl1(word tinyblob not null,

word_id integer not null,

urls blob,

urlcount integer, totalcount integer,

unique index(word(64)),

unique index(word_id));


create table urlword(url_id integer auto_increment primary key,

site_id integer not null,

deleted tinyint DEFAULT 0 NOT NULL,

url varchar(128) not null,

next_index_time INT NOT NULL,

status int(11) DEFAULT '0' NOT NULL,

crc char(32) DEFAULT '' NOT NULL,

last_modified varchar(32) DEFAULT '' NOT NULL,

etag varchar(48) DEFAULT '' NOT NULL,

last_index_time INT NOT NULL,

referrer int(11) DEFAULT '0' NOT NULL,

tag int(11) DEFAULT '0' NOT NULL,

hops int(11) DEFAULT '0' NOT NULL,

redir integer,

origin integer,
unique index(url),

index(next_index_time),

index(hops,next_index_time),

index crc (site_id,crc(8))) DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=8 
RAID_CHUNKSIZE=153600 PACK_KEYS=1;



create table urlwords00(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

lang varchar(2) DEFAULT '' NOT NULL,

words longblob,

hrefs longblob,

unique index(url_id))  DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=10 
RAID_CHUNKSIZE=153600;

create table urlwords01(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

lang varchar(2) DEFAULT '' NOT NULL,

words longblob,

hrefs longblob,

unique index(url_id))  DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=10 
RAID_CHUNKSIZE=153600;

create table urlwords02(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

   

Re: Important bug in RAID tables

2001-06-19 Thread Patrizio Bruno

I can't reproduce the error using a different database configuration and
I can't use the same database schema to make my test (the machine is now in
production state), so I attached to this mail te sql file used to create
the tables, this database is used by ASPseek 1.2.0, you can download it
at www.aspseek.org, and the problem occurred indexing
http://lanazione.monrif.net (almost 3 millions documents, so there were almost
3 millions records in the table 'urlword')

-
Patrizio Bruno
DADA spa / Special Projects
Viale Giovine Italia
50122 Firenze
Italy
tel +39 055200211
fax +39 0552478143

PGP PublicKey available at: http://www.keyserver.net/en/
-


# This SQL script will create the necessary tables for ASPSeek.
# It should be used for MySQL only.
# To create the tables, use something like
# mysql -uasp -pmypassword aspseek  tables.sql
#
#drop table wordurl;

#drop table wordurl1;

#drop table urlword;

#drop table urlwords00;

#drop table urlwords01;

#drop table urlwords02;

#drop table urlwords03;

#drop table urlwords04;

#drop table urlwords05;

#drop table urlwords06;

#drop table urlwords07;

#drop table urlwords08;

#drop table urlwords09;

#drop table urlwords10;

#drop table urlwords11;

#drop table urlwords12;

#drop table urlwords13;

#drop table urlwords14;

#drop table urlwords15;

#drop table sites;

#drop table stat;

#drop table robots;

#drop table subsets;

#drop table spaces;

#drop table tmpurl;

#drop table wordsite;

#drop table citation;

#drop table countries;

#drop table cache;



create table wordurl(word tinyblob not null,

word_id integer auto_increment primary key, urls blob,

urlcount integer, totalcount integer,

unique index(word(64))) PACK_KEYS=1 DELAY_KEY_WRITE=1;

create table wordurl1(word tinyblob not null,

word_id integer not null,

urls blob,

urlcount integer, totalcount integer,

unique index(word(64)),

unique index(word_id));


create table urlword(url_id integer auto_increment primary key,

site_id integer not null,

deleted tinyint DEFAULT 0 NOT NULL,

url varchar(128) not null,

next_index_time INT NOT NULL,

status int(11) DEFAULT '0' NOT NULL,

crc char(32) DEFAULT '' NOT NULL,

last_modified varchar(32) DEFAULT '' NOT NULL,

etag varchar(48) DEFAULT '' NOT NULL,

last_index_time INT NOT NULL,

referrer int(11) DEFAULT '0' NOT NULL,

tag int(11) DEFAULT '0' NOT NULL,

hops int(11) DEFAULT '0' NOT NULL,

redir integer,

origin integer,
unique index(url),

index(next_index_time),

index(hops,next_index_time),

index crc (site_id,crc(8))) DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=8 
RAID_CHUNKSIZE=153600 PACK_KEYS=1;



create table urlwords00(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

lang varchar(2) DEFAULT '' NOT NULL,

words longblob,

hrefs longblob,

unique index(url_id))  DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=10 
RAID_CHUNKSIZE=153600;

create table urlwords01(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

lang varchar(2) DEFAULT '' NOT NULL,

words longblob,

hrefs longblob,

unique index(url_id))  DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHUNKS=10 
RAID_CHUNKSIZE=153600;

create table urlwords02(url_id integer NOT NULL,

deleted tinyint DEFAULT 0 NOT NULL,

wordcount integer,

totalcount integer,

content_type varchar(48) DEFAULT '' NOT NULL,

charset varchar(32) DEFAULT '' NOT NULL,

title varbinary(128) DEFAULT '' NOT NULL,

txt varbinary(255) DEFAULT '' NOT NULL,

docsize int(11) DEFAULT '0' NOT NULL,

keywords varchar(255) DEFAULT '' NOT NULL,

description varbinary(100) DEFAULT '' NOT NULL,

   

Re: Important bug in RAID tables

2001-06-18 Thread Tonu Samuel

On Mon, 18 Jun 2001, Patrizio Bruno wrote:

 select max(field) from table returns the greatest field's value in the first
 file of the RAID table (successive files are not considered), adding a where
 clause (for ex. select max(field) from table where field2 = 200) the
 problem seems not to exist. I don't know if the bug is limited to the
 'max()' function, but, for the use I make of mysql, this is a big problem.


I think this is impossible. MySQL-s higher lever doesn't know anything
about RAID and because this cannot return rows based on location in files.


You did not supplied enough information to reproduce the bug or even
understand its nature. From current niformation I believe that you have
index file broken in smoe reason and because this MySQL of course can
return wrong result not depenging if it is raided or not.


Run REPAIR TABLE urlword and see if problem persists. If yes, give me a
repeatable testcase by mysqldump-ing this table and giving clear example
on which exact commands he gave what exact answer and I try to reproduce
it on my own computer. I cannot fix it before I can reproduce it.

 Environment:
 Linux 2.2.19
 MySQL 3.23.38
 mysql's data directory is on a NFS partition
 
 below the create statement I used to create my table:
 
 create table urlword(url_id integer auto_increment primary key,
 site_id integer not null,
 deleted tinyint DEFAULT 0 NOT NULL,
 url varchar(128) not null,
 next_index_time INT NOT NULL,
 status int(11) DEFAULT '0' NOT NULL,
 crc char(32) DEFAULT '' NOT NULL,
 last_modified varchar(32) DEFAULT '' NOT NULL,
 etag varchar(48) DEFAULT '' NOT NULL,
 last_index_time INT NOT NULL,
 referrer int(11) DEFAULT '0' NOT NULL,
 tag int(11) DEFAULT '0' NOT NULL,
 hops int(11) DEFAULT '0' NOT NULL,
 redir integer,
 origin integer,
 unique index(url),
 index(next_index_time),
 index(hops,next_index_time),
 index crc (site_id,crc(8))) DELAY_KEY_WRITE=1 RAID_TYPE=STRIPED RAID_CHU
 NKS=8 RAID_CHUNKSIZE=153600 PACK_KEYS=1 TYPE=MyISAM;
 
 the mysql configuration file is my-huge.cnf taken from the mysql's
 source-distribution.
 

BTW, why did you change the chunksize? By default it is 256000 and I
tested it for speed with many different values. 256000 seemed to be best
value. If you have any experience showing that I was wrong, please share
it with me.

-- 
For technical support contracts, goto https://order.mysql.com/
   __  ___ ___   __
  /  |/  /_ __/ __/ __ \/ /Mr. Tonu Samuel [EMAIL PROTECTED]
 / /|_/ / // /\ \/ /_/ / /__   MySQL AB, Security Administrator
/_/  /_/\_, /___/\___\_\___/   Hong Kong, China
   ___/   www.mysql.com


-
Before posting, please check:
   http://www.mysql.com/manual.php   (the manual)
   http://lists.mysql.com/   (the list archive)

To request this thread, e-mail [EMAIL PROTECTED]
To unsubscribe, e-mail [EMAIL PROTECTED]
Trouble unsubscribing? Try: http://lists.mysql.com/php/unsubscribe.php