Hi,

    I tested kannel with a CIMD2 SMSC and found out that smsc_cimd2.c logic is to use a single IO thread (io_thread) while writing and reading from the connected socket. The issue that arises from this approach is that when for example a 03 operation is submitted to the SMSC no other operation can be performed until we receive a 53 acknowledge leaving a 200ms to 300ms time period of protocol inactivity. This results in situations like SMSC queueing up MO's when a lengthy queue of MT's is processed by Bearerbox and also poor submit SM performance per socket(we achieved a rate of 3 to 4 MT per sec depending on the SMSC). I thought that the performance would be improved if we could use at least two threads for IO operations, so I modified the original smsc_cimd2 removing the io_thread and adding an i_thread and an o_thread. The modified bearerbox managed to login to the CIMD2 SMSC and performed asynchronous operations as you can see in a protocol dump where we receive and handle a 20(deliver) operation while we wait a 53 ack for a previous 03(submit) operation (something not possible with current implementation):
18:04:58.261648 IP 192.168.10.5.46459 > 10.20.20.2.9971: P 28022:28202(180) ack 9720 win 365 <nop,nop,timestamp 391793217 2644082266>
        0x0000:  4500 00e8 f934 4000 4006 ecc5 0a00 0005  [EMAIL PROTECTED]@.......
        0x0010:  d402 760e b57b 26f3 65a1 67ae 035a c26f  ..v..{&.e.g..Z.o
        0x0020:  8018 016d 54f0 0000 0101 080a 175a 4a41  ...mT........ZJA
        0x0030:  9d99 7e5a 0230 333a 3032 3509 3032 313a  ..~Z.03:025.021:
        0x0040:  3132 3334 3233 3932 3609 3035 353a 3009  123423926.055:0.
        0x0050:  3034 343a 3009 3033 333a 4a55 5a20 3420  044:0.033:JUZ.4.
    .............
18:04:58.428129 IP 10.20.20.2.9971 > 192.168.10.5.46459: . ack 28202 win 32768 <nop,nop,timestamp 2644082284 391793217>
        0x0000:  4500 0034 7727 4000 3d06 7287 d402 760e  E..4w'@.=.r...v.
        0x0010:  0a00 0005 26f3 b57b 035a c26f 65a1 6862  ....&..{.Z.oe.hb
        0x0020:  8010 8000 b4c9 0000 0101 080a 9d99 7e6c  ..............~l
        0x0030:  175a 4a41                                .ZJA
18:04:58.522263 IP 10.20.20.2.9971 > 192.168.10.5.46459: P 9720:9788(68) ack 28202 win 32768 <nop,nop,timestamp 2644082293 391793217>
        0x0000:  4500 0078 7728 4000 3d06 7242 d402 760e  E..xw(@.=.rB..v.
        0x0010:  0a00 0005 26f3 b57b 035a c26f 65a1 6862  ....&..{.Z.oe.hb
        0x0020:  8018 8000 a82d 0000 0101 080a 9d99 7e75  .....-........~u
        0x0030:  175a 4a41 0232 303a 3131 3009 3032 313a  .ZJA.20:110.021:
        0x0040:  3131 3130 3030 3032 0930 3233 3a31 3233  11100002.023:123
        0x0050:  3031 3036 3432 3932 0930 3630 3a30 3730  01064292.060:070
        0x0060:  3630 3531 3730 3435 3809 3033 333a 0930  605170458.033:.0
        0x0070:  3330 3a30 0944 3003                      30:0.D0.
18:04:58.522391 IP 192.168.10.5.46459 > 10.20.20.2.9971: P 28202:28213(11) ack 9788 win 365 <nop,nop,timestamp 391793282 2644082293>
        0x0000:  4500 003f f935 4000 4006 ed6d 0a00 0005  [EMAIL PROTECTED]@..m....
        0x0010:  d402 760e b57b 26f3 65a1 6862 035a c2b3  ..v..{&.e.hb.Z..
        0x0020:  8018 016d 5447 0000 0101 080a 175a 4a82  ...mTG.......ZJ.
        0x0030:  9d99 7e75 0237 303a 3131 3009 3345 03    ..~u.70:110.3E.
18:04:58.551431 IP 10.20.20.2.9971 > 192.168.10.5.46459: P 9788:9830(42) ack 28202 win 32768 <nop,nop,timestamp 2644082296 391793217>
        0x0000:  4500 005e 7729 4000 3d06 725b d402 760e  E..^w)@.=.r[..v.
        0x0010:  0a00 0005 26f3 b57b 035a c2b3 65a1 6862  ....&..{.Z..e.hb
        0x0020:  8018 8000 e869 0000 0101 080a 9d99 7e78  .....i........~x
        0x0030:  175a 4a41 0235 333a 3032 3509 3032 313a  .ZJA.53:025.021:
        0x0040:  3132 3334 3233 3932 3609 3036 303a 3037  123423926.060:07
        0x0050:  3036 3035 3137 3034 3538 0933 3603       0605170458.36.
The tested modification solved the problem of continuing to receive MOs while submitting MTs but didn't improve the MT submission rate (more threads needed?)
I include the diff file from the modification we tried which should *NOT* be considered functional (it might never be) as it stops processing the MT queue after a number of submit operations.
The question I want to ask is if there is another way to enhance CIMD2 performance and if by changing the smsc_cimd2 logic this way will hurt the system elsewhere.

Regards,
Mike Zervakis

--- c:\diff\smsc_cimd2_orig.c   2007-06-06 00:38:37.090412200 +0300
+++ c:\diff\smsc_cimd2_test.c   2007-06-06 01:27:21.645373200 +0300
@@ -116,7 +116,8 @@
 
     List *outgoing_queue;
     SMSCConn *conn;
-    int io_thread;
+    int i_thread;
+    int o_thread;
     int quitting;
     List *stopped; /* list-trick for suspend/isolate */
 
@@ -2233,6 +2234,125 @@
     }
 }
 
+/*  Output Thread
+*
+ */
+static void o_thread (void *arg)
+{
+    Msg       *msg;
+    SMSCConn  *conn = arg;
+    PrivData *pdata = conn->data;
+    double    sleep = 0.0001;
+
+    /* Make sure we log into our own log-file if defined */
+    log_thread_to(conn->log_idx);
+
+    /* remove messages from SMSC until we are killed */
+    while (!pdata->quitting) {
+    
+        gwlist_consume(pdata->stopped); /* block here if suspended/isolated */
+      
+        /* send messages */
+        do {
+            msg = gwlist_extract_first(pdata->outgoing_queue);
+            if (msg) {
+                sleep = 0;
+                if (cimd2_submit_msg(conn,msg) != 0) break;
+            }
+        } while (msg);
+ 
+        if (sleep > 0) {
+
+            /* note that this implementations means that we sleep even
+             * when we fail connection.. but time is very short, anyway
+             */
+            gwthread_sleep(sleep);
+            /* gradually sleep longer and longer times until something starts 
to
+             * happen - this of course reduces response time, but that's 
better than
+             * extensive CPU usage when it is not used
+             */
+            sleep *= 2;
+            if (sleep >= 2.0)
+                sleep = 1.999999;
+        }
+        else {
+            sleep = 0.0001;
+        }
+    }
+}
+
+
+/* Input Thread .
+ * 
+ */
+static void i_thread (void *arg)
+{
+    Msg       *msg;
+    SMSCConn  *conn = arg;
+    PrivData *pdata = conn->data;
+    double    sleep = 0.0001;
+    int conn_active = 0;
+
+    /* Make sure we log into our own log-file if defined */
+    log_thread_to(conn->log_idx);
+
+    /* remove messages from SMSC until we are killed */
+    while (!pdata->quitting) {
+    
+        gwlist_consume(pdata->stopped); /* block here if suspended/isolated */
+      
+        /* check that connection is active */
+        
+        if (conn->status != SMSCCONN_ACTIVE) {
+            if (cimd2_login(conn) != 0) { 
+                error(0, "CIMD2[%s]: Couldn't connect to SMSC (retrying in %ld 
seconds).",
+                      octstr_get_cstr(conn->id), 
+                      conn->reconnect_delay);
+                gwthread_sleep(conn->reconnect_delay);
+                mutex_lock(conn->flow_mutex);
+                conn->status = SMSCCONN_RECONNECTING; 
+                mutex_unlock(conn->flow_mutex);
+                continue; 
+            } 
+            mutex_lock(conn->flow_mutex);
+            conn->status = SMSCCONN_ACTIVE;
+            conn->connect_time = time(NULL);
+            bb_smscconn_connected(conn);
+            mutex_unlock(conn->flow_mutex);
+            
+                       pdata->o_thread = gwthread_create(o_thread, conn);
+        }
+
+        /* receive messages */
+        do { 
+            msg = sms_receive(conn);
+            if (msg) {
+                sleep = 0;
+                debug("bb.sms.cimd2", 0, "CIMD2[%s]: new message received",
+                      octstr_get_cstr(conn->id));
+                bb_smscconn_receive(conn, msg);
+            }
+        } while (msg);
+ 
+        if (sleep > 0) {
+
+            /* note that this implementations means that we sleep even
+             * when we fail connection.. but time is very short, anyway
+             */
+            gwthread_sleep(sleep);
+            /* gradually sleep longer and longer times until something starts 
to
+             * happen - this of course reduces response time, but that's 
better than
+             * extensive CPU usage when it is not used
+             */
+            sleep *= 2;
+            if (sleep >= 2.0)
+                sleep = 1.999999;
+        }
+        else {
+            sleep = 0.0001;
+        }
+    }
+}
 
 static int cimd2_add_msg_cb (SMSCConn *conn, Msg *sms)
 {
@@ -2241,7 +2361,11 @@
 
     copy = msg_duplicate(sms);
     gwlist_produce(pdata->outgoing_queue, copy);
-    gwthread_wakeup(pdata->io_thread);
+    
+    /* Modification by Thodoris Kondilis */
+    gwthread_wakeup(pdata->i_thread);
+    gwthread_wakeup(pdata->o_thread);
+    /* End of Modification */
 
     return 0;
 }
@@ -2273,11 +2397,19 @@
         gwlist_remove_producer(pdata->stopped);
         conn->is_stopped = 0;
     }
-
-    if (pdata->io_thread != -1) {
-        gwthread_wakeup(pdata->io_thread);
-        gwthread_join(pdata->io_thread);
+  
+    
+    /* Modification by Thodoris Kondilis */
+    if (pdata->i_thread != -1) {
+        gwthread_wakeup(pdata->i_thread);
+        gwthread_join(pdata->i_thread);
+    }
+    
+    if (pdata->o_thread != -1) {
+        gwthread_wakeup(pdata->o_thread);
+        gwthread_join(pdata->o_thread);
     }
+    /* End of Modofication */
 
     cimd2_close_socket(pdata);
     cimd2_destroy(pdata); 
@@ -2295,7 +2427,12 @@
 
     gwlist_remove_producer(pdata->stopped);
     /* in case there are messages in the buffer already */
-    gwthread_wakeup(pdata->io_thread);
+    
+    /* Modofication by Thodoris Kondilis */ 
+    gwthread_wakeup(pdata->i_thread);
+    gwthread_wakeup(pdata->o_thread);
+    /* End of Modification */
+    
     debug("bb.sms", 0, "SMSCConn CIMD2 %s, start called",
           octstr_get_cstr(conn->id));
 }
@@ -2410,18 +2547,37 @@
                 maxlen);
     }
 
-    pdata->io_thread = gwthread_create(io_thread, conn);
-
-    if (pdata->io_thread == -1) {  
+    
+    /* Modification by Thodoris Kondilis */
+    pdata->i_thread = gwthread_create(i_thread, conn);
+    
+   
+    if (pdata->i_thread == -1) {  
 
-        error(0,"CIMD2[%s]: Couldn't start I/O thread.",
+        error(0,"CIMD2[%s]: Couldn't start Input thread.",
               octstr_get_cstr(conn->id));
         pdata->quitting = 1;
-        gwthread_wakeup(pdata->io_thread);
-        gwthread_join(pdata->io_thread);
+        gwthread_wakeup(pdata->i_thread);
+        gwthread_join(pdata->i_thread);
+        gwthread_wakeup(pdata->o_thread);
+        gwthread_join(pdata->o_thread);
         cimd2_destroy(pdata);
         return -1;  
     } 
+    
+    if (pdata->o_thread == -1) {  
+
+        error(0,"CIMD2[%s]: Couldn't start Output thread.",
+              octstr_get_cstr(conn->id));
+        pdata->quitting = 1;
+        gwthread_wakeup(pdata->i_thread);
+        gwthread_join(pdata->i_thread);
+        gwthread_wakeup(pdata->o_thread);
+        gwthread_join(pdata->o_thread);
+        cimd2_destroy(pdata);
+        return -1;  
+    }
+    /* End of Modofication */
 
     conn->send_msg = cimd2_add_msg_cb;
     conn->shutdown = cimd2_shutdown_cb;

Reply via email to