Tõnu Samuel wrote: > > I found that there is no mention in Changelog about serious speed > improvement in last mnogosearch while it exists in code. Today I > investigated again MySQL logs made by mnogosearch I tried to eliminate > ¨INSERT INTO url ...¨ commands from there. I implemented it using global > buffer to keep url-s to be inserted and flushing it when needed. > > patch attached which worked for me nice 10 minutes :) , so bugs can be > inside. I am anyway not going to test it very carefully so you can > experiment with it and if found usable, use it. > Sorry, new patch included ;> -- MySQL Development Team __ ___ ___ ____ __ / |/ /_ __/ __/ __ \/ / Tonu Samuel <[EMAIL PROTECTED]> / /|_/ / // /\ \/ /_/ / /__ MySQL AB, http://www.mysql.com/ /_/ /_/\_, /___/\___\_\___/ Tallinn, Estonia <___/
diff -urN mnogosearch-3.1.11/include/udm_db.h mnogosearch-3.1.11-tonu/include/udm_db.h --- mnogosearch-3.1.11/include/udm_db.h Tue Feb 20 13:12:36 2001 +++ mnogosearch-3.1.11-tonu/include/udm_db.h Fri Feb 23 16:48:30 2001 @@ -19,6 +19,8 @@ #define UDM_DB_SAPDB 11 #define UDM_DB_FILES 100 +#define UDM_MAXSQLCOMMANDLENGTH (64*1024) /* This means we TRY to keep command +lengths below this value */ + /* FIXME: hide this function into SQL.c */ extern char * UdmDBEscStr(int dbtype,char * dst,const char *src); @@ -27,6 +29,7 @@ extern int UdmLoadServerTable(UDM_AGENT * Indexer, char * path,int flags); extern int UdmFindURL(UDM_AGENT * Indexer,char *url); extern int UdmAddURL(UDM_AGENT * Indexer,char *url,int referrer,int hops, char * msg_id, UDM_SERVER * Server); +extern int UdmFlushAddURL(UDM_AGENT *); extern int UdmDeleteWordFromURL(UDM_AGENT* Indexer,int url_id); extern int UdmDeleteCrossWordFromURL(UDM_AGENT* Indexer,int url_id,int ref_id); extern int UdmDeleteUrl(UDM_AGENT* Indexer,int url_id); diff -urN mnogosearch-3.1.11/src/hrefs.c mnogosearch-3.1.11-tonu/src/hrefs.c --- mnogosearch-3.1.11/src/hrefs.c Mon Feb 19 14:55:44 2001 +++ mnogosearch-3.1.11-tonu/src/hrefs.c Fri Feb 23 16:48:30 2001 @@ -127,6 +127,10 @@ added++; } } + /* UdmFlushAddURL(Indexer); can be here and can be not. Will be faster if not + * but may be wanted to add some safety + */ + /* Remember last stored URL num */ /* Note that it will became 0 */ /* after next sort in AddUrl */ diff -urN mnogosearch-3.1.11/src/parser.c mnogosearch-3.1.11-tonu/src/parser.c --- mnogosearch-3.1.11/src/parser.c Wed Feb 7 14:17:39 2001 +++ mnogosearch-3.1.11-tonu/src/parser.c Fri Feb 23 16:48:30 2001 @@ -250,7 +250,7 @@ arg2pos=strstr(parser->cmd,"$2"); /* Build temp file names and command line */ - tmpnam(fn0);strcpy(fn1,fn0); + mkstemp(fn0);strcpy(fn1,fn0); fnames[0]=strcat(fn0,".in"); fnames[1]=strcat(fn1,".out"); UdmBuildParamStr(cmd,sizeof(cmd),parser->cmd,fnames,2); diff -urN mnogosearch-3.1.11/src/sql.c mnogosearch-3.1.11-tonu/src/sql.c --- mnogosearch-3.1.11/src/sql.c Tue Feb 20 11:16:41 2001 +++ mnogosearch-3.1.11-tonu/src/sql.c Fri Feb 23 16:58:40 2001 @@ -144,6 +144,8 @@ static const char udmver[]= PACKAGE "-" VERSION "/" DBVER; static char emptystr[]=""; +static char * url_qb=NULL,*url_qe=NULL; +static size_t url_len=0,url_recordno=0; __INDLIB__ const char * UdmVersion(void){ return(udmver); @@ -2635,6 +2637,7 @@ char qbuf[UDMSTRSIZ]="AddURL"; const char * tag="", * category=""; + if(Server){ if(Server->tag)tag=Server->tag; if(Server->category)category=Server->category; @@ -2688,7 +2691,42 @@ case UDM_DB_MYSQL: /* MySQL generates itself */ - default: + + /* Initialize INSERT buffer if not done yet */ + if(!url_qb) { + url_qb=(char*)malloc(UDM_MAXSQLCOMMANDLENGTH); +#ifdef NEWS_EXT + sprintf(url_qb,"INSERT INTO url +(url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category) +VALUES "); +#else + sprintf(url_qb,"INSERT INTO url +(url,referrer,hops,crc32,last_index_time,next_index_time,status,tag,category) VALUES +"); +#endif + url_qe=url_qb+strlen(url_qb); + } else { + /* Buffer was initialized already. We just add to it */ + if(url_recordno++>0) strcpy(url_qe++,","); +#ifdef NEWS_EXT + +sprintf(url_qe,"('%s',%d,%d,next_url_id.nextval,0,%d,%d,0,'%s','%s','%s')", + e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category); +#else + sprintf(url_qe,"('%s',%d,%d,0,%d,%d,0,'%s','%s')", + e_url,referrer,hops,(int)now(),(int)now(),tag,category); +#endif + url_qe=url_qe+strlen(url_qe); + url_len=url_qe-url_qb; + if (UDM_MAXSQLCOMMANDLENGTH < url_len+UDM_MAXWORDSIZE+100) { /* Danger! +We need to flush buffer */ + /* + * Let's flush it when command is 64K long. + * MySQL gives error if we exceed maximum packet size which is + * (if I remember properly) by default is 16Megs ;) + * But having some safety is not bad at all + * + * [EMAIL PROTECTED] + */ + UdmFlushAddURL(Indexer); + } + } + break; + default: #ifdef NEWS_EXT sprintf(qbuf,"INSERT INTO url (url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category) VALUES ('%s',%d,%d,0,%d,%d,0,'%s','%s','%s')",e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category); #else @@ -2698,12 +2736,34 @@ } /* Exec INSERT now */ + if(Indexer->Conf->DBType != UDM_DB_MYSQL) { sql_query(Indexer,qbuf); if(UdmDBErrorCode(Indexer->db)) return(IND_ERROR); else return(IND_OK); + } + return(IND_OK); } + +/* + * We call this function if we want to write all URL-s from memory + * to database. + */ + +int UdmFlushAddURL(UDM_AGENT *Indexer) { + if (url_qb) { /* Can be already flushed */ + if (url_recordno) { /* Can be incomplete INSERT statement */ + sql_query(Indexer,url_qb); + if(UdmDBErrorCode(Indexer->db)) return (IND_ERROR); + } + free(url_qb); + url_qe=url_qb=NULL; + url_recordno=0; + url_len=0; + } +} + int UdmDeleteUrl(UDM_AGENT *Indexer,int url_id){ char qbuf[UDMSTRSIZ]; int res; @@ -3107,7 +3167,7 @@ strcat(sortstr,", hops ASC "); } } - + UdmFlushAddURL(Indexer); /* Would be stupid if only URLs to index are remained +in cache */ #ifdef HAVE_SQL_LIMIT sprintf(qbuf, "SELECT url,rec_id,docsize,status,last_index_time,hops,crc32,last_mod_time FROM url WHERE next_index_time<=%d %s%s%s%s%s%s LIMIT %d",