Tõnu Samuel wrote:
> 
> I found that there is no mention in Changelog about serious speed
> improvement in last mnogosearch while it exists in code. Today I
> investigated again MySQL logs made by mnogosearch I tried to eliminate
> ¨INSERT INTO url ...¨ commands from there. I implemented it using global
> buffer to keep url-s to be inserted and flushing it when needed.
> 
> patch attached which worked for me nice 10 minutes :) , so bugs can be
> inside. I am anyway not going to test it very carefully so you can
> experiment with it and if found usable, use it.
> 

Sorry, new patch included ;>

-- 
MySQL Development Team
   __  ___     ___ ____  __
  /  |/  /_ __/ __/ __ \/ /   Tonu Samuel <[EMAIL PROTECTED]>
 / /|_/ / // /\ \/ /_/ / /__  MySQL AB, http://www.mysql.com/
/_/  /_/\_, /___/\___\_\___/  Tallinn, Estonia
       <___/
diff -urN mnogosearch-3.1.11/include/udm_db.h mnogosearch-3.1.11-tonu/include/udm_db.h
--- mnogosearch-3.1.11/include/udm_db.h Tue Feb 20 13:12:36 2001
+++ mnogosearch-3.1.11-tonu/include/udm_db.h    Fri Feb 23 16:48:30 2001
@@ -19,6 +19,8 @@
 #define UDM_DB_SAPDB            11
 #define UDM_DB_FILES           100
 
+#define UDM_MAXSQLCOMMANDLENGTH  (64*1024) /* This means we TRY to keep command 
+lengths below this value */
+
 /* FIXME: hide this function into SQL.c */
 extern char * UdmDBEscStr(int dbtype,char * dst,const char *src);
 
@@ -27,6 +29,7 @@
 extern int UdmLoadServerTable(UDM_AGENT * Indexer, char * path,int flags);
 extern int UdmFindURL(UDM_AGENT * Indexer,char *url);
 extern int UdmAddURL(UDM_AGENT * Indexer,char *url,int referrer,int hops, char * 
msg_id, UDM_SERVER * Server);
+extern int UdmFlushAddURL(UDM_AGENT *);
 extern int UdmDeleteWordFromURL(UDM_AGENT* Indexer,int url_id);
 extern int UdmDeleteCrossWordFromURL(UDM_AGENT* Indexer,int url_id,int ref_id);
 extern int UdmDeleteUrl(UDM_AGENT* Indexer,int url_id);
diff -urN mnogosearch-3.1.11/src/hrefs.c mnogosearch-3.1.11-tonu/src/hrefs.c
--- mnogosearch-3.1.11/src/hrefs.c      Mon Feb 19 14:55:44 2001
+++ mnogosearch-3.1.11-tonu/src/hrefs.c Fri Feb 23 16:48:30 2001
@@ -127,6 +127,10 @@
                        added++;
                }
        }
+   /*   UdmFlushAddURL(Indexer); can be here and can be not. Will be faster if not
+    *   but may be wanted to add some safety 
+    */
+
        /* Remember last stored URL num */
        /* Note that it will became 0   */
        /* after next sort in AddUrl    */
diff -urN mnogosearch-3.1.11/src/parser.c mnogosearch-3.1.11-tonu/src/parser.c
--- mnogosearch-3.1.11/src/parser.c     Wed Feb  7 14:17:39 2001
+++ mnogosearch-3.1.11-tonu/src/parser.c        Fri Feb 23 16:48:30 2001
@@ -250,7 +250,7 @@
        arg2pos=strstr(parser->cmd,"$2");
 
        /* Build temp file names and command line */
-       tmpnam(fn0);strcpy(fn1,fn0);
+       mkstemp(fn0);strcpy(fn1,fn0);
        fnames[0]=strcat(fn0,".in");
        fnames[1]=strcat(fn1,".out");
        UdmBuildParamStr(cmd,sizeof(cmd),parser->cmd,fnames,2);
diff -urN mnogosearch-3.1.11/src/sql.c mnogosearch-3.1.11-tonu/src/sql.c
--- mnogosearch-3.1.11/src/sql.c        Tue Feb 20 11:16:41 2001
+++ mnogosearch-3.1.11-tonu/src/sql.c   Fri Feb 23 16:58:40 2001
@@ -144,6 +144,8 @@
 static const char udmver[]= PACKAGE "-" VERSION "/" DBVER;
 static char emptystr[]="";
 
+static char * url_qb=NULL,*url_qe=NULL;
+static size_t url_len=0,url_recordno=0;
 
 __INDLIB__ const char * UdmVersion(void){
        return(udmver);
@@ -2635,6 +2637,7 @@
        char qbuf[UDMSTRSIZ]="AddURL";
        const char * tag="", * category="";
 
+
        if(Server){
                if(Server->tag)tag=Server->tag;
                if(Server->category)category=Server->category;
@@ -2688,7 +2691,42 @@
 
                case UDM_DB_MYSQL:
                        /* MySQL generates itself */
-               default:        
+
+         /* Initialize INSERT buffer if not done yet */
+         if(!url_qb) {
+            url_qb=(char*)malloc(UDM_MAXSQLCOMMANDLENGTH);
+#ifdef NEWS_EXT
+                       sprintf(url_qb,"INSERT INTO url 
+(url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category) 
+VALUES ");
+#else  
+                       sprintf(url_qb,"INSERT INTO url 
+(url,referrer,hops,crc32,last_index_time,next_index_time,status,tag,category) VALUES 
+");
+#endif
+            url_qe=url_qb+strlen(url_qb);
+         } else { 
+            /* Buffer was initialized already. We just add to it */
+            if(url_recordno++>0) strcpy(url_qe++,",");
+#ifdef NEWS_EXT
+            
+sprintf(url_qe,"('%s',%d,%d,next_url_id.nextval,0,%d,%d,0,'%s','%s','%s')",
+                       e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category);
+#else
+            sprintf(url_qe,"('%s',%d,%d,0,%d,%d,0,'%s','%s')",
+                       e_url,referrer,hops,(int)now(),(int)now(),tag,category);
+#endif
+            url_qe=url_qe+strlen(url_qe);
+            url_len=url_qe-url_qb;
+            if (UDM_MAXSQLCOMMANDLENGTH < url_len+UDM_MAXWORDSIZE+100) { /* Danger! 
+We need to flush buffer */
+               /* 
+                * Let's flush it when command is 64K long. 
+                * MySQL gives error if we exceed maximum packet size which is
+                * (if I remember properly) by default is 16Megs ;) 
+                * But having some safety is not bad at all 
+                *
+                * [EMAIL PROTECTED]
+                */ 
+               UdmFlushAddURL(Indexer);     
+            }
+         }
+         break;
+               default:
 #ifdef NEWS_EXT
                        sprintf(qbuf,"INSERT INTO url 
(url,referrer,hops,crc32,last_index_time,next_index_time,status,msg_id,tag,category) 
VALUES 
('%s',%d,%d,0,%d,%d,0,'%s','%s','%s')",e_url,referrer,hops,(int)now(),(int)now(),msg_id,tag,category);
 #else  
@@ -2698,12 +2736,34 @@
        }
 
        /* Exec INSERT now */
+       if(Indexer->Conf->DBType != UDM_DB_MYSQL) {
        sql_query(Indexer,qbuf);
        if(UdmDBErrorCode(Indexer->db))
                return(IND_ERROR);
        else
                return(IND_OK);
+   }
+   return(IND_OK);
 }
+
+/*
+ * We call this function if we want to write all URL-s from memory
+ * to database. 
+ */ 
+
+int UdmFlushAddURL(UDM_AGENT *Indexer) {
+   if (url_qb) { /* Can be already flushed */
+     if (url_recordno) {     /* Can be incomplete INSERT statement */ 
+       sql_query(Indexer,url_qb);
+       if(UdmDBErrorCode(Indexer->db)) return (IND_ERROR);
+     }
+     free(url_qb);
+     url_qe=url_qb=NULL;
+     url_recordno=0;
+     url_len=0;
+   }
+}
+
 int UdmDeleteUrl(UDM_AGENT *Indexer,int url_id){
        char qbuf[UDMSTRSIZ];
        int res;
@@ -3107,7 +3167,7 @@
                                strcat(sortstr,", hops ASC ");
                        }
                }
-
+      UdmFlushAddURL(Indexer); /* Would be stupid if only URLs to index are remained 
+in cache */
 #ifdef HAVE_SQL_LIMIT
                sprintf(qbuf,
                "SELECT 
url,rec_id,docsize,status,last_index_time,hops,crc32,last_mod_time FROM url WHERE 
next_index_time<=%d %s%s%s%s%s%s LIMIT %d",

Reply via email to