On Tue, Feb 2, 2010 at 11:20 AM, Nicolas Williams
<[email protected]> wrote:
> On Tue, Feb 02, 2010 at 09:23:36AM +0100, Sylvain Pointeau wrote:
>> I would be very interested to see some benchmark, just to see.....
>
> Feel free to write the relevant program, schema, SQL statements and run
> benchmarks against it. What performance you get will depend on: what
> the long-running processes are doing (are they CPU bound or I/O bound?)
> and what CPU or I/O resources you have.
Apparently, my bash has a bug with trap as I was unable to get your
code to work, although I like its design.
And while I try to understand how mapreduce works, I thought I would
create a working example of what I am trying to accomplish (see script
below.) It includes data and timings for both a single process and
for 6 parallel processes.
Regards,
- Robert
------
#!/bin/bash
debug() {
1>&2 echo "$@"
}
long_running_process() {
foo=$1
debug "lrp -- $foo"
sleep 10
echo $foo
}
subprocess() {
segment=$1
{
echo "begin transaction ;"
echo "select rowid, item from sample where segment=${segment} ;" |
sqlite3 -separator $'\t' sample.db |
while read rowid item ; do
status=$(long_running_process "${item}" )
echo "update sample set status=${status} where rowid=${rowid} ;"
done
echo "commit transaction ;"
} > update.${segment}.sql
}
multi_update() {
eval $(
sqlite3 sample.db 'select distinct segment from sample ; ' |
while read segment ; do
echo "subprocess "${segment}" &"
done
echo wait
)
}
segment() {
segments=$1
echo "update sample set segment=(rowid-1) % ${segments} ; " |
sqlite3 sample.db
}
cleanup() {
rm update.*.sql >& /dev/null
}
createdb() {
{ cat <<eof
BEGIN TRANSACTION;
CREATE TABLE sample (item int, segment int, status int);
insert into sample (item) select random() >> 50 ;
insert into sample (item) select random() >> 50 ;
insert into sample (item) select random() >> 50 ;
insert into sample (item) select random() >> 50 ;
insert into sample (item) select random() >> 50 ;
insert into sample (item) select random() >> 50 ;
COMMIT;
eof
} | sqlite3 sample.db
}
initialize(){
rm sample.db >& /dev/null
cleanup
createdb
}
timing() {
segments=$1
echo -- initialize
initialize
sqlite3 -header -column sample.db 'select rowid, * from sample ; '
echo -- processing ${segments}
segment ${segments}
time -p multi_update
echo -- updating ${segments}
time -p cat update.*.sql | sqlite3 sample.db
sqlite3 -header -column sample.db 'select rowid, * from sample ; '
cleanup
}
main() {
# parallel process
timing 6
# single process
timing 1
}
main
_______________________________________________
sqlite-users mailing list
[email protected]
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users