Walt,
I made the following changes to get the kmod and client-core working with
the WALT3 branch.
While it works now, I do not know if these are the right sets of fixes..
I did a few stress tests and all tests through the vfs do seem to pass
now..
thanks,
Murali

>>>
OK, I think I fixed the small-io problem and the mkdir problem.
That only leaves the mounting problem.  I've never attempted to build
the kernel interface or mount the file system (being the old goat that I
am) so that might take a bit.

I'll commit the changes I made and you can run them in the next nightly
to see if anything new pops up.

Walt
Index: src/client/sysint/client-state-machine.c
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/client-state-machine.c,v
retrieving revision 1.79.4.11
diff -u -r1.79.4.11 client-state-machine.c
--- src/client/sysint/client-state-machine.c    1 Aug 2006 15:51:59 -0000       
1.79.4.11
+++ src/client/sysint/client-state-machine.c    8 Sep 2006 01:26:17 -0000
@@ -56,7 +56,11 @@
 {
     gen_mutex_lock(&s_completion_list_mutex);
     assert(s_completion_list_index < MAX_RETURNED_JOBS);
+    if (smcb->op_completed)
+        goto out;
+    smcb->op_completed = 1;
     s_completion_list[s_completion_list_index++] = smcb;
+out:
     gen_mutex_unlock(&s_completion_list_mutex);
     return 0;
 }
@@ -146,7 +150,7 @@
             tmp_completion_list[new_list_index++] = smcb;
         }
     }
-    *out_count = i;
+    *out_count = PVFS_util_min(i, limit);
 
     /* clean up and adjust the list and it's book keeping */
     s_completion_list_index = new_list_index;
@@ -294,6 +298,8 @@
             (PINT_smcb_cancelled(smcb)) &&
             (cancelled_io_jobs_are_pending(smcb))))
     {
+        gossip_debug(GOSSIP_CLIENT_DEBUG, 
+                "client_state_machine_terminate smcb %p\n", smcb);
         ret = add_sm_to_completion_list(smcb);
         assert(ret == 0);
     }
@@ -378,8 +384,8 @@
     if (PINT_smcb_complete(smcb))
     {
         gossip_debug(
-            GOSSIP_CLIENT_DEBUG, "Posted %s (immediate completion)\n",
-            PINT_client_get_name_str(pvfs_sys_op));
+            GOSSIP_CLIENT_DEBUG, "Posted %s (%lld) (immediate completion)\n",
+            PINT_client_get_name_str(pvfs_sys_op), (op_id ? *op_id : -1));
 
         ret = add_sm_to_completion_list(smcb);
         assert(ret == 0);
@@ -387,8 +393,8 @@
     else
     {
         gossip_debug(
-            GOSSIP_CLIENT_DEBUG, "Posted %s (waiting for test)\n",
-            PINT_client_get_name_str(pvfs_sys_op));
+            GOSSIP_CLIENT_DEBUG, "Posted %s (%lld) (waiting for test)\n",
+            PINT_client_get_name_str(pvfs_sys_op), (op_id ? *op_id : -1));
     }
     return ret;
 }
@@ -631,13 +637,22 @@
         {
             PINT_smcb_set_complete(tmp_smcb);
         }
+        if (PINT_smcb_invalid_op(tmp_smcb))
+        {
+            gossip_err("Invalid sm control block op %d\n", 
PINT_smcb_op(tmp_smcb));
+            continue;
+        }
+        gossip_debug(GOSSIP_CLIENT_DEBUG, "sm control op %d\n", 
PINT_smcb_op(tmp_smcb));
 
         if (!PINT_smcb_complete(tmp_smcb))
         {
             ret = PINT_state_machine_next(tmp_smcb, &job_status_array[i]);
 
-            assert(ret == SM_ACTION_DEFERRED ||
-                    ret == SM_ACTION_TERMINATE); /* ret == 0 */
+            if (ret != SM_ACTION_DEFERRED &&
+                    ret != SM_ACTION_TERMINATE); /* ret == 0 */
+            {
+                continue;
+            }
         }
 
         /* make sure we don't return internally cancelled I/O jobs */
@@ -655,13 +670,14 @@
           being tested here, we add it to our local completion list so
           that later calls to the sysint test/testsome can find it
         */
-        /*  This is handled in terminate fn now
+        /* add_sm_to_completion_list() does the right thing in determining 
whether
+         * smcb has already been added to completion Q or not 
+         */
         if ((tmp_smcb != smcb) && (PINT_smcb_complete(tmp_smcb)))
         {
             ret = add_sm_to_completion_list(tmp_smcb);
             assert(ret == 0);
         }
-        */
     }
 
     if (PINT_smcb_complete(smcb))
@@ -692,9 +708,6 @@
     job_status_s job_status_array[MAX_RETURNED_JOBS];
     void *client_sm_p_array[MAX_RETURNED_JOBS] = {NULL};
 
-    gossip_debug(GOSSIP_CLIENT_DEBUG,
-                 "PINT_client_state_machine_testsome\n");
-
     CLIENT_SM_ASSERT_INITIALIZED();
 
     if (!op_id_array || !op_count || !error_code_array)
@@ -753,8 +766,11 @@
              * itself; the return value of the underlying operation is
              * kept in the job status structure.
              */
-            assert(ret == SM_ACTION_DEFERRED ||
-                    ret == SM_ACTION_TERMINATE);
+            if (ret != SM_ACTION_DEFERRED &&
+                    ret != SM_ACTION_TERMINATE)
+            {
+                continue;
+            }
         }
 
         /* make sure we don't return internally cancelled I/O jobs */
@@ -773,13 +789,15 @@
           grab all completed operations when we're finished
           (i.e. outside of this loop).
         */
-        /* now done in terminate function
+
+        /* add_sm_to_completion_list(smcb) does the right thing if an smcb
+         *  was already added to the completion list
+         */
         if (PINT_smcb_complete(smcb))
         {
             ret = add_sm_to_completion_list(smcb);
             assert(ret == 0);
         }
-        */
     }
 
     return completion_list_retrieve_completed(
@@ -833,48 +851,69 @@
  */
 void PVFS_sys_release(PVFS_sys_op_id op_id)
 {
-    PINT_smcb *smcb = PINT_id_gen_safe_lookup(op_id);
-    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
-    PVFS_credentials *cred_p = sm_p->cred_p;
+    PINT_smcb *smcb; 
+    PINT_client_sm *sm_p; 
+    PVFS_credentials *cred_p; 
 
     gossip_debug(GOSSIP_CLIENT_DEBUG,
               "PVFS_sys_release id %lld\n",op_id);
-
-    if (smcb)
+    smcb = PINT_id_gen_safe_lookup(op_id);
+    if (smcb == NULL) 
     {
-        PINT_id_gen_safe_unregister(op_id);
-
-        if (PINT_smcb_op(smcb) && cred_p)
-        {
-            PVFS_util_release_credentials(cred_p);
-            sm_p->cred_p = NULL;
-        }
+        return;
+    }
+    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+    if (sm_p == NULL) 
+    {
+        cred_p = NULL;
+    }
+    else 
+    {
+        cred_p = sm_p->cred_p;
+    }
+    PINT_id_gen_safe_unregister(op_id);
 
-        PINT_smcb_free(&smcb);
+    if (PINT_smcb_op(smcb) && cred_p)
+    {
+        PVFS_util_release_credentials(cred_p);
+        if (sm_p) sm_p->cred_p = NULL;
     }
+
+    PINT_smcb_free(&smcb);
 }
 
 /* why is this different??? */
 void PVFS_mgmt_release(PVFS_mgmt_op_id op_id)
 {
-    PINT_smcb *smcb = PINT_id_gen_safe_lookup(op_id);
-    PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+    PINT_smcb *smcb; 
+    PINT_client_sm *sm_p; 
+    PVFS_credentials *cred_p; 
 
     gossip_debug(GOSSIP_CLIENT_DEBUG,
               "PVFS_mgmt_release id %lld\n",op_id);
-
-    if (smcb)
+    smcb = PINT_id_gen_safe_lookup(op_id);
+    if (smcb == NULL)
     {
-        PINT_id_gen_safe_unregister(op_id);
-
-        if (PINT_smcb_op(smcb) && sm_p->cred_p)
-        {
-            PVFS_util_release_credentials(sm_p->cred_p);
-            sm_p->cred_p = NULL;
-        }
+        return;
+    }
+    sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
+    if (sm_p == NULL)
+    {
+        cred_p = NULL;
+    }
+    else
+    {
+        cred_p = sm_p->cred_p;
+    }
+    PINT_id_gen_safe_unregister(op_id);
 
-        PINT_smcb_free(&smcb);
+    if (PINT_smcb_op(smcb) && cred_p)
+    {
+        PVFS_util_release_credentials(cred_p);
+        if (sm_p) sm_p->cred_p = NULL;
     }
+
+    PINT_smcb_free(&smcb);
 }
 
 const char *PINT_client_get_name_str(int op_type)
@@ -920,6 +959,7 @@
         { PVFS_CLIENT_JOB_TIMER, "PVFS_CLIENT_JOB_TIMER" },
         { PVFS_DEV_UNEXPECTED, "PVFS_DEV_UNEXPECTED" },
         { PVFS_SYS_FS_ADD, "PVFS_SYS_FS_ADD" },
+        { PVFS_SYS_STATFS, "PVFS_SYS_STATFS" },
         { 0, "UNKNOWN" }
     };
 
Index: src/client/sysint/client-state-machine.h
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/client-state-machine.h,v
retrieving revision 1.162.2.8
diff -u -r1.162.2.8 client-state-machine.h
--- src/client/sysint/client-state-machine.h    1 Aug 2006 15:51:59 -0000       
1.162.2.8
+++ src/client/sysint/client-state-machine.h    8 Sep 2006 01:26:17 -0000
@@ -613,7 +613,9 @@
     PVFS_DEV_UNEXPECTED            = 400
 };
 
+#define PVFS_OP_SYS_MAXVALID  20
 #define PVFS_OP_SYS_MAXVAL 69
+#define PVFS_OP_MGMT_MAXVALID 81
 #define PVFS_OP_MGMT_MAXVAL 199
 
 int PINT_client_io_cancel(job_id_t id);
Index: src/client/sysint/mgmt-statfs-list.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/mgmt-statfs-list.sm,v
retrieving revision 1.39.4.10
diff -u -r1.39.4.10 mgmt-statfs-list.sm
--- src/client/sysint/mgmt-statfs-list.sm       1 Aug 2006 15:52:00 -0000       
1.39.4.10
+++ src/client/sysint/mgmt-statfs-list.sm       8 Sep 2006 01:26:17 -0000
@@ -257,7 +257,6 @@
     }
 
     sm_p->error_code  = error;
-    PINT_SET_OP_COMPLETE;
 
     return SM_ACTION_COMPLETE;
 }
Index: src/common/misc/state-machine-fns.c
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/common/misc/Attic/state-machine-fns.c,v
retrieving revision 1.1.2.11
diff -u -r1.1.2.11 state-machine-fns.c
--- src/common/misc/state-machine-fns.c 29 Aug 2006 20:44:02 -0000      1.1.2.11
+++ src/common/misc/state-machine-fns.c 8 Sep 2006 01:26:17 -0000
@@ -14,6 +14,7 @@
 #include "gossip.h"
 #include "pvfs2-debug.h"
 #include "state-machine.h"
+#include "client-state-machine.h"
 
 /* STATE-MACHINE-FNS.C
  *
@@ -223,6 +224,11 @@
     do {
         /* loop while returning from nested SM */
         do {
+            if (!smcb->current_state || !smcb->current_state->trtbl)
+            {
+                gossip_err("SM current state or trtbl is invalid\n");
+                return -1;
+            }
             transtbl = smcb->current_state->trtbl;
     
            /* for each entry in the transition table there is a return
@@ -346,6 +352,35 @@
 int PINT_smcb_op(struct PINT_smcb *smcb)
 {
     return smcb->op;
+}
+
+static int PINT_smcb_sys_op(struct PINT_smcb *smcb)
+{
+    if (smcb->op > 0 && smcb->op < PVFS_OP_SYS_MAXVALID)
+        return 1;
+    return 0;
+}
+
+static int PINT_smcb_mgmt_op(struct PINT_smcb *smcb)
+{
+    if (smcb->op > PVFS_OP_SYS_MAXVAL && smcb->op < PVFS_OP_MGMT_MAXVALID)
+        return 1;
+    return 0;
+}
+
+static int PINT_smcb_misc_op(struct PINT_smcb *smcb)
+{
+    return smcb->op == PVFS_SERVER_GET_CONFIG 
+        || smcb->op == PVFS_CLIENT_JOB_TIMER 
+        || smcb->op == PVFS_CLIENT_PERF_COUNT_TIMER 
+        || smcb->op == PVFS_DEV_UNEXPECTED;
+}
+
+int PINT_smcb_invalid_op(struct PINT_smcb *smcb)
+{
+    if (!PINT_smcb_sys_op(smcb) && !PINT_smcb_mgmt_op(smcb) && 
!PINT_smcb_misc_op(smcb))
+        return 1;
+    return 0;
 }
 
 /* Function: PINT_smcb_set_complete
Index: src/common/misc/state-machine.h
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/common/misc/state-machine.h,v
retrieving revision 1.12.4.14
diff -u -r1.12.4.14 state-machine.h
--- src/common/misc/state-machine.h     30 Aug 2006 18:14:21 -0000      
1.12.4.14
+++ src/common/misc/state-machine.h     8 Sep 2006 01:26:17 -0000
@@ -66,6 +66,7 @@
     int op_terminate; /* indicates SM is ready to terminate */
     int op_cancelled; /* indicates SM operation was cancelled */
     int children_running; /* the number of child SMs running */
+    int op_completed;  /* indicates SM operation was added to completion Q */
     /* add a lock here */
     job_context_id context; /* job context when waiting for children */
     int (*terminate_fn)(struct PINT_smcb *, job_status_s *);
@@ -190,6 +191,7 @@
 int PINT_smcb_set_op(struct PINT_smcb *smcb, int op);
 int PINT_smcb_op(struct PINT_smcb *smcb);
 void PINT_smcb_set_complete(struct PINT_smcb *smcb);
+int PINT_smcb_invalid_op(struct PINT_smcb *smcb);
 int PINT_smcb_complete(struct PINT_smcb *smcb);
 void PINT_smcb_set_cancelled(struct PINT_smcb *smcb);
 int PINT_smcb_cancelled(struct PINT_smcb *smcb);
_______________________________________________
Pvfs2-developers mailing list
Pvfs2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to