All of the patches listed in this email are limited scope- they
just fix specific bugs and don't make any protocol or storage
format changes:
pvfs2-kernel-permissions.patch:
-----------
The pvfs2_permission() function is significantly different (due to
#ifdefs) based on whether or not the kernel provides a
generic_permission() function which was introduced around 2.6.10.
The code path that gets triggered for kernel versions < 2.6.10 has
to duplicate a good bit of permission checking functionality, but
it got a few details wrong. This patch corrects that, based on
examples from the old vfs_permission() function and how NFS
implemented ACL support before generic_permission() was available.
There are a set of about 5 or so LTP file system tests that will
not pass on 2.6.9 with PVFS2 unless this patch is applied.
sys-remove-crdirent.patch:
-----------
There is a corner case in sys-remove in which if an operation
fails, sys-remove has to replace a directory entry. The msgpair
setup for this operation was using the wrong handle to determine
which server to send the request to, however, so the request is
sent to the server holding the removed object rather than the
server holding the parent directory. The doesn't trigger a problem
unless a) you use multiple meta servers and b) you have the
misfortune of triggering this sys-remove error case.
balance-io-servers.patch:
-----------
The PINT_cached_config_next_io() function uses a round robin
algorithm to pick the next set of I/O servers to use when creating
files. The logic had a subtle flaw though. If you use all I/O
servers (as most people do), then the I/O server slection is
balanced. However, if you configure the file system to use only
one datafile, it would result in every other server being skipped.
If the number of total number of servers is even, then the wrap-
around lines up every time to make it so that half of the servers
pretty much stay overloaded in terms of I/O traffic. This patch
corrects the round robin algorithm to make sure that every I/O
server is used fairly, so that we are balanced regardless of how
many datafiles are used. I can give a more detailed explanation of
what went wrong here if needed (I think I am the one that
introduced the problem).
env-shmem-cleanup.patch:
-----------
This patch makes 3 improvements to the berkeley db cache usage:
- use the port number of the server's host id as a component of the
shmem key to use (if shmem style caching is in effect). This
insures that if 2 servers are started on the same server, they will
not use the same shmem region.
- on startup, always forcibly remove any old environment remnants
before opening the environment. This makes sure that if the
previous server instance died unexpectedly, the leftover __db.*
files won't interfere with anything. These old files don't serve
any purpose for the next pvfs2-server run, especially without
transactions.
- look for a particular error code on env open that could be an
indicator that the cache size was too big for the RAM size of the
machine, and print a warning message.
test-chown-sudo.patch
-----------
This fixes a minor bug in how sudo is handled in test programs
related to the chown() operation.
test-sgid-error-msg.patch
-----------
This patch adds a clearer error message if test-setgid fails
because of bad arguments.
-Phil
Index: pvfs2_src/src/common/misc/pint-cached-config.c
===================================================================
--- pvfs2_src/src/common/misc/pint-cached-config.c (revision 1923)
+++ pvfs2_src/src/common/misc/pint-cached-config.c (revision 1924)
@@ -277,6 +277,7 @@
}
else
{
+ /* we let the jitter loop below increment the
cursor by one */
jitter = 0;
}
while(jitter-- > -1)
@@ -338,6 +339,7 @@
struct qlist_head *hash_link = NULL;
struct config_fs_cache_s *cur_config_cache = NULL;
int jitter = 0, num_io_servers = 0;
+ PINT_llist* old_data_server_cursor = NULL;
if (config && num_servers && io_handle_extent_array)
{
@@ -353,6 +355,7 @@
num_io_servers = PINT_llist_count(
cur_config_cache->fs->data_handle_ranges);
+
/* pick random starting point, then round robin */
if(!io_randomized)
{
@@ -378,6 +381,7 @@
cur_config_cache->data_server_cursor =
PINT_llist_next(
cur_config_cache->data_server_cursor);
}
+ old_data_server_cursor = cur_config_cache-
>data_server_cursor;
while(num_servers)
{
@@ -418,6 +422,10 @@
io_addr_array++;
}
ret = ((num_servers == 0) ? 0 : ret);
+ /* reset data server cursor to point to the old
cursor; the
+ * jitter on the next iteration will increment it by one
+ */
+ cur_config_cache->data_server_cursor =
old_data_server_cursor;
}
}
return ret;
diff -Naur pvfs2-old/src/io/trove/trove-dbpf/dbpf-mgmt.c pvfs2/src/
io/trove/trove-dbpf/dbpf-mgmt.c
--- pvfs2-old/src/io/trove/trove-dbpf/dbpf-mgmt.c 2006-06-05
21:57:27.000000000 +0200
+++ pvfs2/src/io/trove/trove-dbpf/dbpf-mgmt.c 2006-06-07
05:26:42.000000000 +0200
@@ -45,6 +45,7 @@
char dbpf_method_name[] = "dbpf";
extern int TROVE_db_cache_size_bytes;
+extern int TROVE_shm_key_hint;
struct dbpf_storage *my_storage_p = NULL;
static int db_open_count, db_close_count;
@@ -63,12 +64,22 @@
*error = -EINVAL;
return NULL;
}
+
+ /* we start by making sure any old environment remnants are
cleaned up */
+ ret = db_env_create(&dbenv, 0);
+ if (ret != 0)
+ {
+ gossip_err("dbpf_putdb_env: could not create any
environment handle: %s\n", db_strerror(ret));
+ return 0;
+ }
+ /* don't check return code here; we don't care if it fails */
+ dbenv->remove(dbenv, path, DB_FORCE);
retry:
ret = db_env_create(&dbenv, 0);
if (ret != 0)
{
- gossip_lerr("dbpf_getdb_env: %s\n", db_strerror(ret));
+ gossip_err("dbpf_getdb_env: %s\n", db_strerror(ret));
*error = ret;
return NULL;
}
@@ -99,7 +110,7 @@
DB_INIT_MPOOL|DB_CREATE|DB_THREAD, 0);
if(ret != 0)
{
- gossip_lerr("dbpf_getdb_env(%s): %s\n", path,
db_strerror(ret));
+ gossip_err("dbpf_getdb_env(%s): %s\n", path,
db_strerror(ret));
*error = ret;
return NULL;
}
@@ -107,11 +118,12 @@
else
{
/* default to using shm style cache */
-
- ret = dbenv->set_shm_key(dbenv, 646567223);
+ gossip_debug(GOSSIP_TROVE_DEBUG, "dbpf using shm key: %d\n",
+ (646567223+TROVE_shm_key_hint));
+ ret = dbenv->set_shm_key(dbenv, (646567223
+TROVE_shm_key_hint));
if(ret != 0)
{
- gossip_lerr("dbenv->set_shm_key(%s): %s\n",
+ gossip_err("dbenv->set_shm_key(%s): %s\n",
path, db_strerror(ret));
*error = ret;
return NULL;
@@ -144,9 +156,18 @@
goto retry;
}
+ if(ret == DB_RUNRECOVERY)
+ {
+ gossip_err("dbpf_getdb_env(): DB_RUNRECOVERY on
environment open.\n");
+ gossip_err(
+ "\n\n"
+ " Please make sure that you have not chosen a
DBCacheSizeBytes\n"
+ " configuration file value that is too large for
your machine.\n\n");
+ }
+
if(ret != 0)
{
- gossip_lerr("dbpf_getdb_env(%s): %s\n", path,
db_strerror(ret));
+ gossip_err("dbpf_getdb_env(%s): %s\n", path,
db_strerror(ret));
*error = ret;
return NULL;
}
diff -Naur pvfs2-old/src/io/trove/trove.c pvfs2/src/io/trove/trove.c
--- pvfs2-old/src/io/trove/trove.c 2006-05-26 00:17:19.000000000 +0200
+++ pvfs2/src/io/trove/trove.c 2006-06-07 05:23:53.000000000 +0200
@@ -30,6 +30,7 @@
struct PINT_perf_counter* PINT_server_pc = NULL;
int TROVE_db_cache_size_bytes = 0;
+int TROVE_shm_key_hint = 0;
/** Initiate reading from a contiguous region in a bstream into a
* contiguous region in memory.
@@ -930,6 +931,11 @@
TROVE_db_cache_size_bytes = *((int *)parameter);
return 0;
}
+ if(option == TROVE_SHM_KEY_HINT)
+ {
+ TROVE_shm_key_hint = *((int*)parameter);
+ return(0);
+ }
method_id = map_coll_id_to_method(coll_id);
if (method_id < 0) {
diff -Naur pvfs2-old/src/io/trove/trove.h pvfs2/src/io/trove/trove.h
--- pvfs2-old/src/io/trove/trove.h 2006-06-05 21:57:26.000000000 +0200
+++ pvfs2/src/io/trove/trove.h 2006-06-07 05:24:29.000000000 +0200
@@ -75,7 +75,8 @@
TROVE_COLLECTION_ATTR_CACHE_INITIALIZE,
TROVE_DB_CACHE_SIZE_BYTES,
TROVE_COLLECTION_COALESCING_HIGH_WATERMARK,
- TROVE_COLLECTION_COALESCING_LOW_WATERMARK
+ TROVE_COLLECTION_COALESCING_LOW_WATERMARK,
+ TROVE_SHM_KEY_HINT
};
/** Initializes the Trove layer. Must be called before any other
Trove
diff -Naur pvfs2-old/src/server/pvfs2-server.c pvfs2/src/server/
pvfs2-server.c
--- pvfs2-old/src/server/pvfs2-server.c 2006-06-05
21:57:28.000000000 +0200
+++ pvfs2/src/server/pvfs2-server.c 2006-06-07 05:23:19.000000000
+0200
@@ -155,6 +155,7 @@
static int create_pidfile(char *pidfile);
static void write_pidfile(int fd);
static void remove_pidfile(void);
+static int parse_port_from_host_id(char* host_id);
/* table of incoming request types and associated parameters */
struct PINT_server_req_params PINT_server_req_table[] =
@@ -901,6 +902,7 @@
char buf[16] = {0};
PVFS_fs_id orig_fsid;
PVFS_ds_flags init_flags = 0;
+ int port_num = 0;
/* Initialize distributions */
ret = PINT_dist_initialize(0);
@@ -946,6 +948,16 @@
/* this should never fail */
assert(ret == 0);
+ /* parse port number and allow trove to use it to help
differentiate
+ * shmem regions if needed
+ */
+ port_num = parse_port_from_host_id(server_config.host_id);
+ if(port_num > 0)
+ {
+ ret = trove_collection_setinfo(0, 0, TROVE_SHM_KEY_HINT,
&port_num);
+ assert(ret == 0);
+ }
+
if(server_config.db_cache_type && (!strcmp
(server_config.db_cache_type,
"mmap")))
{
@@ -1850,6 +1862,37 @@
return &server_config;
}
+/* parse_port_from_host_id()
+ *
+ * attempts to parse the port number from a BMI address. Returns
port number
+ * on success, -1 on failure
+ */
+static int parse_port_from_host_id(char* host_id)
+{
+ int ret = -1;
+ int port_num;
+ char* port_index;
+ char* colon_index;
+
+ /* see if we have a <proto>://<hostname>:<port> format */
+ port_index = rindex(host_id, ':');
+ colon_index = index(host_id, ':');
+ /* if so, parse the port number */
+ if(port_index && (port_index != colon_index))
+ {
+ port_index++;
+ ret = sscanf(port_index, "%d", &port_num);
+ }
+
+ /* report error if we don't find a valid port number in the
string */
+ if(ret != 1)
+ {
+ return(-1);
+ }
+
+ return(port_num);
+}
+
/*
* Local variables:
* c-indent-level: 4
Index: pvfs2_src/src/kernel/linux-2.6/acl.c
===================================================================
--- pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1653)
+++ pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1654)
@@ -567,9 +567,12 @@
return -EACCES;
}
/* Do permission checks only for lookups and opens */
- if (!nd || !(nd->flags & (LOOKUP_OPEN | LOOKUP_ACCESS)))
- {
- return 0;
+ if ((mask & MAY_EXEC) == 0) {
+ if (!nd || !(nd->flags & (LOOKUP_OPEN | LOOKUP_ACCESS)))
+ {
+ pvfs2_print("pvfs2_permission: returning OK when
checking for lookups and opens access!\n");
+ return 0;
+ }
}
if (current->fsuid == inode->i_uid)
{
@@ -608,18 +611,25 @@
}
pvfs2_print("pvfs2_permission: mode & mask & S_IRWXO = %d,
mask = %d\n", mode & mask & S_IRWXO, mask);
if ((mode & mask & S_IRWXO) == mask)
+ {
+ pvfs2_print("pvfs2_permission: returning zero after
node&mask&S_IRWXO check!\n");
return 0;
+ }
check_capabilities:
/* Are we allowed to override DAC */
if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
{
if (capable(CAP_DAC_OVERRIDE))
+ {
+ pvfs2_print("pvfs2_permission: returning zero after
mask & MAY_READ|MAYWRITE %d OR inode->i_mode & S_IXUGO %d !\n",mask
& (MAY_READ|MAY_WRITE), inode->i_mode & S_IXUGO);
return 0;
+ }
}
/* Allow read/search if CAP_DAC_READ_SEARCH */
if (capable(CAP_DAC_READ_SEARCH) &&
((mask == MAY_READ) || (S_ISDIR(inode->i_mode) && !
(mask && MAY_WRITE))))
{
+ pvfs2_print("pvfs2_permission: returningg zero after
capable(CAP_DAC_READ_SEARCH) %d && (mask == MAY_READ) %d || (S_ISDIR
(inode->i_mode) %d && !(mask && MAY_WRITE) %d\n",capable
(CAP_DAC_READ_SEARCH),mask == MAY_READ,S_ISDIR(inode->i_mode),!
(mask && MAY_WRITE));
return 0;
}
pvfs2_print("pvfs2_permission: disallowing access\n");
Index: pvfs2_src/src/kernel/linux-2.6/acl.c
===================================================================
--- pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1718)
+++ pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1719)
@@ -570,7 +570,6 @@
if ((mask & MAY_EXEC) == 0) {
if (!nd || !(nd->flags & (LOOKUP_OPEN | LOOKUP_ACCESS)))
{
- pvfs2_print("pvfs2_permission: returning OK when
checking for lookups and opens access!\n");
return 0;
}
}
@@ -609,29 +608,20 @@
if (in_group_p(inode->i_gid))
mode >>= 3;
}
- pvfs2_print("pvfs2_permission: mode & mask & S_IRWXO = %d,
mask = %d\n", mode & mask & S_IRWXO, mask);
if ((mode & mask & S_IRWXO) == mask)
{
- pvfs2_print("pvfs2_permission: returning zero after
node&mask&S_IRWXO check!\n");
return 0;
}
check_capabilities:
/* Are we allowed to override DAC */
- if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
+ if(!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO) || S_ISDIR
(inode->i_mode))
{
- if (capable(CAP_DAC_OVERRIDE))
+ if(capable(CAP_DAC_OVERRIDE))
{
- pvfs2_print("pvfs2_permission: returning zero after
mask & MAY_READ|MAYWRITE %d OR inode->i_mode & S_IXUGO %d !\n",mask
& (MAY_READ|MAY_WRITE), inode->i_mode & S_IXUGO);
return 0;
}
}
- /* Allow read/search if CAP_DAC_READ_SEARCH */
- if (capable(CAP_DAC_READ_SEARCH) &&
- ((mask == MAY_READ) || (S_ISDIR(inode->i_mode) && !
(mask && MAY_WRITE))))
- {
- pvfs2_print("pvfs2_permission: returningg zero after
capable(CAP_DAC_READ_SEARCH) %d && (mask == MAY_READ) %d || (S_ISDIR
(inode->i_mode) %d && !(mask && MAY_WRITE) %d\n",capable
(CAP_DAC_READ_SEARCH),mask == MAY_READ,S_ISDIR(inode->i_mode),!
(mask && MAY_WRITE));
- return 0;
- }
+
pvfs2_print("pvfs2_permission: disallowing access\n");
return -EACCES;
#endif
Index: pvfs2_src/src/kernel/linux-2.6/acl.c
===================================================================
--- pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1728)
+++ pvfs2_src/src/kernel/linux-2.6/acl.c (revision 1729)
@@ -567,12 +567,14 @@
return -EACCES;
}
/* Do permission checks only for lookups and opens */
+/*
if ((mask & MAY_EXEC) == 0) {
if (!nd || !(nd->flags & (LOOKUP_OPEN | LOOKUP_ACCESS)))
{
return 0;
}
}
+*/
if (current->fsuid == inode->i_uid)
{
mode >>= 6;
Index: pvfs2_src/src/client/sysint/sys-remove.sm
===================================================================
--- pvfs2_src/src/client/sysint/sys-remove.sm (revision 1727)
+++ pvfs2_src/src/client/sysint/sys-remove.sm (revision 1728)
@@ -356,7 +356,7 @@
sm_p->parent_ref.fs_id);
msg_p->fs_id = sm_p->parent_ref.fs_id;
- msg_p->handle = sm_p->object_ref.handle;
+ msg_p->handle = sm_p->parent_ref.handle;
msg_p->retry_flag = PVFS_MSGPAIR_NO_RETRY;
msg_p->comp_fn = remove_crdirent_comp_fn;
@@ -547,7 +547,7 @@
sm_p->u.remove.object_name);
PRINT_REMOVE_WARNING();
- js_p->error_code = 0;
+ js_p->error_code = -PVFS_ENOENT;
return 1;
}
Index: pvfs2_src/test/shared/test-common.c
===================================================================
--- pvfs2_src/test/shared/test-common.c (revision 1729)
+++ pvfs2_src/test/shared/test-common.c (revision 1730)
@@ -819,8 +819,9 @@
}
else
{
+
/* Determine if we need to use sudo to change owner/group */
- if(geteuid() != owner_id ||
+ if(geteuid() != owner_id &&
geteuid() != group_id)
{
if(verbose)
Index: pvfs2_src/test/client/vfs/test-setgid.c
===================================================================
--- pvfs2_src/test/client/vfs/test-setgid.c (revision 1830)
+++ pvfs2_src/test/client/vfs/test-setgid.c (revision 1831)
@@ -464,7 +464,6 @@
if(ret != TEST_COMMON_SUCCESS)
{
- print_error("Unable to process arguments\n");
usage();
return(-1);
}
_______________________________________________
Pvfs2-developers mailing list
Pvfs2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers