date:20071201

[PATCH 0/10] sysfs network namespace support

2007-12-01 Thread Eric W. Biederman


Now that we have network namespace support merged it is time to
revisit the sysfs support so we can remove the dependency on !SYSFS.

I'm not even trying to base this on any of Tejun's very interesting 
work on sysfs to remove the coupling between kobjects and
sysfs_dirents.  For my objective that just means I would need to
spend several more weeks staring at sysfs trying to figure out
how to get where I am going and iterating several times from yet
another new starting place.  I want to get something working before I
try for anymore perfection.

I don't expect the userspace side of this to ever change which is
close enough to perfect for me.

The bulk of the patches are the changes to allow multiple sysfs
superblocks.

Then comes the tagged directory sysfs support which uses information
captured at mount time to decide which object with which tag will
appear in a directory.

Then the support for renaming and deleting objects where the source
may be ambiguous because of tagging.

Then finally the network namespace support so it is clear how all
of this tied together.

Greg the last patch that enables tagged directory support seems
to make most sense living in your tree, as it lives half in
fs/sysfs/mount.c, and half in net/core/net-sysfs.c and all of
it's dependencies are in Linus tree except for this patchset.

Eric
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 03/10] sysfs: sysfs_get_dentry add a sb parameter

2007-12-01 Thread Eric W. Biederman


In preparation for multiple mounts of sysfs add a superblock parameter to
sysfs_get_dentry.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/dir.c   |   11 ++-
 fs/sysfs/file.c  |2 +-
 fs/sysfs/sysfs.h |2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..cff2b12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -84,6 +84,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 
 /**
  * sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
  * @sd: sysfs_dirent of interest
  *
  * Get dentry for @sd.  Dentry is looked up if currently not
@@ -96,9 +97,9 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
  * RETURNS:
  * Pointer to found dentry on success, ERR_PTR() value on error.
  */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent 
*sd)
 {
-   struct dentry *dentry = dget(sysfs_sb-s_root);
+   struct dentry *dentry = dget(sb-s_root);
 
while (dentry-d_fsdata != sd) {
struct sysfs_dirent *cur;
@@ -778,7 +779,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char 
*new_name)
goto out;   /* nothing to rename */
 
/* get the original dentry */
-   old_dentry = sysfs_get_dentry(sd);
+   old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
goto out;
@@ -845,14 +846,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject 
*new_parent_kobj)
goto out;   /* nothing to move */
 
/* get dentries */
-   old_dentry = sysfs_get_dentry(sd);
+   old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
goto out;
}
old_parent = old_dentry-d_parent;
 
-   new_parent = sysfs_get_dentry(new_parent_sd);
+   new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd);
if (IS_ERR(new_parent)) {
error = PTR_ERR(new_parent);
goto out;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index ad13151..8c7bba0 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -569,7 +569,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute 
*attr, mode_t mode)
goto out;
 
mutex_lock(sysfs_rename_mutex);
-   victim = sysfs_get_dentry(victim_sd);
+   victim = sysfs_get_dentry(sysfs_sb, victim_sd);
mutex_unlock(sysfs_rename_mutex);
if (IS_ERR(victim)) {
rc = PTR_ERR(victim);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3308759..d4269ba 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -112,7 +112,7 @@ extern spinlock_t sysfs_assoc_lock;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct inode_operations sysfs_dir_inode_operations;
 
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent 
*sd);
 struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
 void sysfs_put_active_two(struct sysfs_dirent *sd);
 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
-- 
1.5.3.rc6.17.g1911

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 04/10] sysfs: Implement __sysfs_get_dentry

2007-12-01 Thread Eric W. Biederman


This function is similar but much simpler to sysfs_get_dentry
returns a sysfs dentry if one curently exists.

This requires less locking the sysfs_get_dentry and which
makes it preferable in some contexts.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/dir.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index cff2b12..3ec9040 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -764,6 +764,44 @@ void sysfs_remove_dir(struct kobject * kobj)
__sysfs_remove_dir(sd);
 }
 
+/**
+ * __sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
+ * @sd: sysfs_dirent of interest
+ *
+ * Get dentry for @sd.  Only return a dentry if one currently
+ * exists.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * Pointer to found dentry on success, NULL on failure.
+ */
+static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct 
sysfs_dirent *sd)
+{
+   struct inode *inode;
+   struct dentry *dentry = NULL;
+
+   inode = ilookup5_nowait(sysfs_sb, sd-s_ino, sysfs_ilookup_test, sd);
+   if (inode  !(inode-i_state  I_NEW)) {
+   struct dentry *alias;
+   spin_lock(dcache_lock);
+   list_for_each_entry(alias, inode-i_dentry, d_alias) {
+   if (!IS_ROOT(alias)  d_unhashed(alias))
+   continue;
+   if (alias-d_sb != sb)
+   continue;
+   dentry = alias;
+   dget_locked(dentry);
+   break;
+   }
+   spin_unlock(dcache_lock);
+   }
+   iput(inode);
+   return dentry;
+}
+
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 {
struct sysfs_dirent *sd = kobj-sd;
-- 
1.5.3.rc6.17.g1911

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 05/10] sysfs: Rename Support multiple superblocks

2007-12-01 Thread Eric W. Biederman


This patch modifies the sysfs_rename_dir and sysfs_move_dir
to support multiple sysfs dentry trees rooted in different
sysfs superblocks.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/dir.c |  190 +++
 1 files changed, 135 insertions(+), 55 deletions(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3ec9040..0d0c87e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -802,42 +802,112 @@ static struct dentry *__sysfs_get_dentry(struct 
super_block *sb, struct sysfs_di
return dentry;
 }
 
+struct sysfs_rename_struct {
+   struct list_head list;
+   struct dentry *old_dentry;
+   struct dentry *new_dentry;
+   struct dentry *old_parent;
+   struct dentry *new_parent;
+};
+
+static void post_rename(struct list_head *head)
+{
+   struct sysfs_rename_struct *srs;
+   while (!list_empty(head)) {
+   srs = list_entry(head-next, struct sysfs_rename_struct, list);
+   dput(srs-old_dentry);
+   dput(srs-new_dentry);
+   dput(srs-old_parent);
+   dput(srs-new_parent);
+   list_del(srs-list);
+   kfree(srs);
+   }
+}
+
+static int prep_rename(struct list_head *head,
+   struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
+   const char *name)
+{
+   struct sysfs_rename_struct *srs;
+   struct super_block *sb;
+   struct dentry *dentry;
+   int error;
+
+   list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) {
+   dentry = sysfs_get_dentry(sb, sd);
+   if (dentry == ERR_PTR(-EXDEV))
+   continue;
+   if (IS_ERR(dentry)) {
+   error = PTR_ERR(dentry);
+   goto err_out;
+   }
+
+   srs = kzalloc(sizeof(*srs), GFP_KERNEL);
+   if (!srs) {
+   dput(dentry);
+   goto err_out;
+   }
+
+   INIT_LIST_HEAD(srs-list);
+   list_add(head, srs-list);
+   srs-old_dentry = dentry;
+   srs-old_parent = dget(dentry-d_parent);
+
+   dentry = sysfs_get_dentry(sb, new_parent_sd);
+   if (IS_ERR(dentry)) {
+   error = PTR_ERR(dentry);
+   goto err_out;
+   }
+   srs-new_parent = dentry;
+
+   error = -ENOMEM;
+   dentry = d_alloc_name(srs-new_parent, name);
+   if (!dentry)
+   goto err_out;
+   srs-new_dentry = dentry;
+   }
+   return 0;
+
+err_out:
+   post_rename(head);
+   return error;
+}
+
 int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 {
struct sysfs_dirent *sd = kobj-sd;
-   struct dentry *parent = NULL;
-   struct dentry *old_dentry = NULL, *new_dentry = NULL;
+   struct list_head todo;
+   struct sysfs_rename_struct *srs;
+   struct inode *parent_inode = NULL;
const char *dup_name = NULL;
int error;
 
+   INIT_LIST_HEAD(todo);
mutex_lock(sysfs_rename_mutex);
 
error = 0;
if (strcmp(sd-s_name, new_name) == 0)
goto out;   /* nothing to rename */
 
-   /* get the original dentry */
-   old_dentry = sysfs_get_dentry(sysfs_sb, sd);
-   if (IS_ERR(old_dentry)) {
-   error = PTR_ERR(old_dentry);
-   goto out;
-   }
+   sysfs_grab_supers();
+   error = prep_rename(todo, sd, sd-s_parent, new_name);
+   if (error)
+   goto out_release;
 
-   parent = old_dentry-d_parent;
+   error = -ENOMEM;
+   mutex_lock(sysfs_mutex);
+   parent_inode = sysfs_get_inode(sd-s_parent);
+   mutex_unlock(sysfs_mutex);
+   if (!parent_inode)
+   goto out_release;
 
-   /* lock parent and get dentry for new name */
-   mutex_lock(parent-d_inode-i_mutex);
+   mutex_lock(parent_inode-i_mutex);
mutex_lock(sysfs_mutex);
 
error = -EEXIST;
if (sysfs_find_dirent(sd-s_parent, new_name))
goto out_unlock;
 
-   error = -ENOMEM;
-   new_dentry = d_alloc_name(parent, new_name);
-   if (!new_dentry)
-   goto out_unlock;
-
/* rename kobject and sysfs_dirent */
error = -ENOMEM;
new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
@@ -852,17 +922,21 @@ int sysfs_rename_dir(struct kobject * kobj, const char 
*new_name)
sd-s_name = new_name;
 
/* rename */
-   d_add(new_dentry, NULL);
-   d_move(old_dentry, new_dentry);
+   list_for_each_entry(srs, todo, list) {
+   d_add(srs-new_dentry, NULL);
+   d_move(srs-old_dentry, srs-new_dentry);
+   }
 
error = 0;
- out_unlock:
+out_unlock:
mutex_unlock(sysfs_mutex);
-   mutex_unlock(parent-d_inode-i_mutex);
+

[PATCH 06/10] sysfs: sysfs_chmod_file handle multiple superblocks

2007-12-01 Thread Eric W. Biederman


Teach sysfs_chmod_file how to handle multiple sysfs superblocks.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/file.c |   51 ---
 1 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 8c7bba0..ade6140 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -558,7 +558,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
 int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 {
struct sysfs_dirent *victim_sd = NULL;
-   struct dentry *victim = NULL;
+   struct super_block *sb;
+   struct dentry *victim;
struct inode * inode;
struct iattr newattrs;
int rc;
@@ -569,31 +570,35 @@ int sysfs_chmod_file(struct kobject *kobj, struct 
attribute *attr, mode_t mode)
goto out;
 
mutex_lock(sysfs_rename_mutex);
-   victim = sysfs_get_dentry(sysfs_sb, victim_sd);
-   mutex_unlock(sysfs_rename_mutex);
-   if (IS_ERR(victim)) {
-   rc = PTR_ERR(victim);
-   victim = NULL;
-   goto out;
-   }
-
-   inode = victim-d_inode;
-
-   mutex_lock(inode-i_mutex);
+   sysfs_grab_supers();
+   list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) {
+   victim = sysfs_get_dentry(sb, victim_sd);
+   if (victim == ERR_PTR(-EXDEV))
+   continue;
+   if (IS_ERR(victim)) {
+   rc = PTR_ERR(victim);
+   victim = NULL;
+   goto out_unlock;
+   }
 
-   newattrs.ia_mode = (mode  S_IALLUGO) | (inode-i_mode  ~S_IALLUGO);
-   newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-   rc = notify_change(victim, newattrs);
+   inode = victim-d_inode;
+   mutex_lock(inode-i_mutex);
+   newattrs.ia_mode = (mode  S_IALLUGO) | (inode-i_mode  
~S_IALLUGO);
+   newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+   rc = notify_change(victim, newattrs);
+   if (rc == 0) {
+   mutex_lock(sysfs_mutex);
+   victim_sd-s_mode = newattrs.ia_mode;
+   mutex_unlock(sysfs_mutex);
+   }
+   mutex_unlock(inode-i_mutex);
 
-   if (rc == 0) {
-   mutex_lock(sysfs_mutex);
-   victim_sd-s_mode = newattrs.ia_mode;
-   mutex_unlock(sysfs_mutex);
+   dput(victim);
}
-
-   mutex_unlock(inode-i_mutex);
- out:
-   dput(victim);
+out_unlock:
+   sysfs_release_supers();
+   mutex_unlock(sysfs_rename_mutex);
+out:
sysfs_put(victim_sd);
return rc;
 }
-- 
1.5.3.rc6.17.g1911

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 07/10] sysfs: Implement sysfs tagged directory support.

2007-12-01 Thread Eric W. Biederman


The problem.  When implementing a network namespace I need to be able
to have multiple network devices with the same name.  Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.

What this patch does is to add an additional tag field to the
sysfs dirent structure.  For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible.  Effectively
this is the same as creating multiple distinct directories with
the same name the internally to sysfs the result is nicer.

I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.

For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded.  Which means I need
a simple race free way to setup those directories as tagged.

To achieve a race free design all tagged directories are created
and managed by sysfs itself.  The upper level code that knows what
tagged directories we need provides just two methods that enable
this:
  sb_tag() - that returns a void * tag that identifies the context of
the process that mounted sysfs.
  kobject_tag(kobj) - that returns a void * tag that identifies the context
a kobject should be in.
Everything else is left up to sysfs.

For the network namespace sb_tag and kobject_tag are essentially
one line functions, and look to remain that.

The work needed in sysfs is more extensive.  At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent.  Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.

Currently only directories which hold kobjects, and
symlinks are supported.  There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/bin.c|2 +-
 fs/sysfs/dir.c|  182 +
 fs/sysfs/file.c   |8 +-
 fs/sysfs/group.c  |   12 ++--
 fs/sysfs/inode.c  |6 +-
 fs/sysfs/mount.c  |   44 +++-
 fs/sysfs/symlink.c|2 +-
 fs/sysfs/sysfs.h  |   16 -
 include/linux/sysfs.h |   16 
 9 files changed, 255 insertions(+), 33 deletions(-)

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 006fc64..86e1128 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct 
bin_attribute * attr)
 
 void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-   sysfs_hash_and_remove(kobj-sd, attr-attr.name);
+   sysfs_hash_and_remove(kobj, kobj-sd, attr-attr.name);
 }
 
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 0d0c87e..f4bd41a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -99,8 +99,17 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
  */
 struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent 
*sd)
 {
-   struct dentry *dentry = dget(sb-s_root);
+   struct dentry *dentry;
+
+   /* Bail if this sd won't show up in this superblock */
+   if (sd-s_parent  sd-s_parent-s_flags  SYSFS_FLAG_TAGGED) {
+   const void *tag;
+   tag = sysfs_lookup_tag(sd-s_parent, sb);
+   if (sd-s_tag.tag != tag)
+   return ERR_PTR(-EXDEV);
+   }
 
+   dentry = dget(sb-s_root);
while (dentry-d_fsdata != sd) {
struct sysfs_dirent *cur;
struct dentry *parent;
@@ -419,7 +428,11 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
  */
 int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
-   if (sysfs_find_dirent(acxt-parent_sd, sd-s_name)) {
+   const void *tag = NULL;
+
+   tag = sysfs_creation_tag(acxt-parent_sd, sd);
+
+   if (sysfs_find_dirent(acxt-parent_sd, tag, sd-s_name)) {
printk(KERN_WARNING sysfs: duplicate filename '%s' 
   can not be created\n, sd-s_name);
WARN_ON(1);
@@ -428,6 +441,9 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct 
sysfs_dirent *sd)
 
sd-s_parent = sysfs_get(acxt-parent_sd);
 
+   if (sd-s_parent-s_flags  SYSFS_FLAG_TAGGED)
+   sd-s_tag.tag = tag;
+
if

[PATCH 08/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link

2007-12-01 Thread Eric W. Biederman


When removing a symlink sysfs_remove_link does not provide enough
information to figure out which tagged directory the symlink falls in.
So I need sysfs_delete_link which is passed the target of the symlink
to delete.

Further half the time when we are removing a symlink the code is
actually renaming the symlink but not doing so explicitly because we
don't have a symlink rename method.  So I have added sysfs_rename_link
as well.

Both of these functions now have enough information to find a symlink
in a tagged directory.  The only restriction is that they must be
called before the target kobject is renamed or deleted.  If they are
called later I loose track of which tag the target kobject was marked
with and can no longer find the old symlink to remove it.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/symlink.c|   31 +++
 include/linux/sysfs.h |   17 +
 2 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index b0f8070..89c98cb 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -80,6 +80,21 @@ int sysfs_create_link(struct kobject * kobj, struct kobject 
* target, const char
 }
 
 /**
+ * sysfs_delete_link - remove symlink in object's directory.
+ * @kobj:  object we're acting for.
+ * @targ:  object we're pointing to.
+ * @name:  name of the symlink to remove.
+ *
+ * Unlike sysfs_remove_link sysfs_delete_link has enough information
+ * to successfully delete symlinks in tagged directories.
+ */
+void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
+   const char *name)
+{
+   sysfs_hash_and_remove(targ, kobj-sd, name);
+}
+
+/**
  * sysfs_remove_link - remove symlink in object's directory.
  * @kobj:  object we're acting for.
  * @name:  name of the symlink to remove.
@@ -90,6 +105,22 @@ void sysfs_remove_link(struct kobject * kobj, const char * 
name)
sysfs_hash_and_remove(kobj, kobj-sd, name);
 }
 
+/**
+ * sysfs_rename_link - rename symlink in object's directory.
+ * @kobj:  object we're acting for.
+ * @targ:  object we're pointing to.
+ * @old:   previous name of the symlink.
+ * @new:   new name of the symlink.
+ *
+ * A helper function for the common rename symlink idiom.
+ */
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+   const char *old, const char *new)
+{
+   sysfs_delete_link(kobj, targ, old);
+   return sysfs_create_link(kobj, targ, new);
+}
+
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
 struct sysfs_dirent *target_sd, char *path)
 {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c8d7a69..c2e8b0d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -109,6 +109,12 @@ int __must_check sysfs_create_link(struct kobject *kobj, 
struct kobject *target,
   const char *name);
 void sysfs_remove_link(struct kobject *kobj, const char *name);
 
+int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+   const char *old_name, const char *new_name);
+
+void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
+   const char *name);
+
 int __must_check sysfs_create_group(struct kobject *kobj,
const struct attribute_group *grp);
 void sysfs_remove_group(struct kobject *kobj,
@@ -195,6 +201,17 @@ static inline void sysfs_remove_link(struct kobject *kobj, 
const char *name)
;
 }
 
+static inline int sysfs_rename_link(struct kobject * k, struct kobject *t,
+   const char *old_name, const char * new_name)
+{
+   return 0;
+}
+
+static inline void sysfs_delete_link(struct kobject *k, struct kobject *t,
+const char *name)
+{
+}
+
 static inline int sysfs_create_group(struct kobject *kobj,
 const struct attribute_group *grp)
 {
-- 
1.5.3.rc6.17.g1911

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/10] driver core: Implement tagged directory support for device classes.

2007-12-01 Thread Eric W. Biederman


This patch enables tagging on every class directory if struct class
has tag_ops.

In addition device_del and device_rename were modified to use
sysfs_delete_link and sysfs_rename_link respectively to ensure when
these operations happen on devices whose classes have tag_ops that they
work properly.


Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 drivers/base/class.c   |   30 ---
 drivers/base/core.c|   51 +--
 include/linux/device.h |2 +
 3 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index c4f8843..ed9393d 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -135,6 +135,17 @@ static void remove_class_attrs(struct class * cls)
}
 }
 
+static int class_setup_tagging(struct class *cls)
+{
+   const struct sysfs_tagged_dir_operations *tag_ops;
+
+   tag_ops = cls-tag_ops;
+   if (!tag_ops)
+   return 0;
+
+   return sysfs_enable_tagging(cls-subsys.kobj, tag_ops);
+}
+
 int class_register(struct class * cls)
 {
int error;
@@ -160,11 +171,22 @@ int class_register(struct class * cls)
cls-subsys.kobj.ktype = class_ktype;
 
error = kset_register(cls-subsys);
-   if (!error) {
-   error = add_class_attrs(class_get(cls));
-   class_put(cls);
-   }
+   if (error)
+   goto out;
+
+   error = class_setup_tagging(cls);
+   if (error)
+   goto out_unregister;
+
+   error = add_class_attrs(cls);
+   if (error)
+   goto out_unregister;
+
+out:
return error;
+out_unregister:
+   kset_unregister(cls-subsys);
+   goto out;
 }
 
 void class_unregister(struct class * cls)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index a2c3d4e..f9d3fcf 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -600,16 +600,20 @@ static struct kobject *get_device_parent(struct device 
*dev,
return kobj;
 
/* or create a new class-directory at the parent device */
-   k = kobject_create(dev-class-name, parent_kobj);
-   if (!k)
+bser   kobj = kobject_create(dev-class-name, parent_kobj);
+   if (!kobj)
return NULL;
-   k-kset = dev-class-class_dirs;
-   retval = kobject_register(k);
+   kobj-kset = dev-class-class_dirs;
+   retval = kobject_register(kobj);
if (retval  0) {
-   kfree(k);
+   kfree(kobj);
return NULL;
}
-   return k;
+   /* If we created a new class-directory setup tagging */
+   if (kobj  dev-class-tag_ops)
+   sysfs_enable_tagging(k, dev-class-tag_ops);
+
+   return kobj;
}
 
if (parent)
@@ -758,7 +762,8 @@ static void device_remove_class_symlinks(struct device *dev)
 
if (dev-kobj.parent != dev-class-subsys.kobj 
dev-type != part_type)
-   sysfs_remove_link(dev-class-subsys.kobj, dev-bus_id);
+   sysfs_delete_link(dev-class-subsys.kobj,
+ dev-kobj, dev-bus_id);
 #else
if (dev-parent  dev-type != part_type)
sysfs_remove_link(dev-kobj, device);
@@ -1223,6 +1228,15 @@ int device_rename(struct device *dev, char *new_name)
strlcpy(old_device_name, dev-bus_id, BUS_ID_SIZE);
strlcpy(dev-bus_id, new_name, BUS_ID_SIZE);
 
+#ifndef CONFIG_SYSFS_DEPRECATED
+   if (dev-class  (dev-kobj.parent != dev-class-subsys.kobj)) {
+   error = sysfs_rename_link(dev-class-subsys.kobj,
+   dev-kobj, old_device_name, new_name);
+   if (error)
+   goto out;
+   }
+#endif
+
error = kobject_rename(dev-kobj, new_name);
if (error) {
strlcpy(dev-bus_id, old_device_name, BUS_ID_SIZE);
@@ -1231,24 +1245,13 @@ int device_rename(struct device *dev, char *new_name)
 
 #ifdef CONFIG_SYSFS_DEPRECATED
if (old_class_name) {
+   error = -ENOMEM;
new_class_name = make_class_name(dev-class-name, dev-kobj);
-   if (new_class_name) {
-   error = sysfs_create_link(dev-parent-kobj,
- dev-kobj, new_class_name);
-   if (error)
-   goto out;
-   sysfs_remove_link(dev-parent-kobj, old_class_name);
-   }
-   }
-#else
-   if (dev-class) {
-   sysfs_remove_link(dev-class-subsys.kobj, old_device_name);
-   error = sysfs_create_link(dev-class-subsys.kobj, dev-kobj,
- dev-bus_id);
-   if (error) {
-   dev_err(dev,

[PATCH 10/10] net: Enable tagging for net_class directories in sysfs

2007-12-01 Thread Eric W. Biederman


The problem.  Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
sysfs directory entries we ensure that we don't have conflicts in the
directories and applications only see a limited set of the network
devices.

Signed-off-by: Eric W. Biederman [EMAIL PROTECTED]
---
 fs/sysfs/mount.c  |   36 
 include/linux/sysfs.h |2 ++
 net/Kconfig   |2 +-
 net/core/net-sysfs.c  |   20 
 4 files changed, 59 insertions(+), 1 deletions(-)

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f6e49d9..ed47133 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,6 +16,8 @@
 #include linux/mount.h
 #include linux/pagemap.h
 #include linux/init.h
+#include linux/nsproxy.h
+#include net/net_namespace.h
 
 #include sysfs.h
 
@@ -78,6 +80,7 @@ static int sysfs_fill_super(struct super_block *sb, void 
*data, int silent)
root-d_sb = sb;
sb-s_root = root;
sb-s_fs_info = info;
+   info-tag.net_ns = hold_net(current-nsproxy-net_ns);
return 0;
 
 out_err:
@@ -95,6 +98,9 @@ static int sysfs_test_super(struct super_block *sb, void *ptr)
struct sysfs_super_info *info = sysfs_info(sb);
int found = 1;
 
+   if (task-nsproxy-net_ns != info-tag.net_ns)
+   found = 0;
+
return found;
 }
 
@@ -131,6 +137,8 @@ static void sysfs_kill_sb(struct super_block *sb)
struct sysfs_super_info *info = sysfs_info(sb);
 
kill_anon_super(sb);
+   if (info-tag.net_ns)
+   release_net(info-tag.net_ns);
kfree(info);
 }
 
@@ -181,6 +189,31 @@ restart:
spin_unlock(sb_lock);
 }
 
+#ifdef CONFIG_NET
+static void sysfs_net_exit(struct net *net)
+{
+   /* Allow the net namespace to go away while sysfs is still mounted. */
+   struct super_block *sb;
+   mutex_lock(sysfs_rename_mutex);
+   sysfs_grab_supers();
+   mutex_lock(sysfs_mutex);
+   list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) {
+   struct sysfs_super_info *info = sysfs_info(sb);
+   if (info-tag.net_ns != net)
+   continue;
+   release_net(info-tag.net_ns);
+   info-tag.net_ns = NULL;
+   }
+   mutex_unlock(sysfs_mutex);
+   sysfs_release_supers();
+   mutex_unlock(sysfs_rename_mutex);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+   .exit = sysfs_net_exit,
+};
+#endif
+
 int __init sysfs_init(void)
 {
int err = -ENOMEM;
@@ -205,6 +238,9 @@ int __init sysfs_init(void)
unregister_filesystem(sysfs_fs_type);
goto out_err;
}
+#ifdef CONFIG_NET
+   register_pernet_subsys(sysfs_net_ops);
+#endif
} else
goto out_err;
 out:
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c2e8b0d..2c93278 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -19,6 +19,7 @@
 
 struct kobject;
 struct module;
+struct net;
 
 /* FIXME
  * The *owner field is no longer used, but leave around
@@ -77,6 +78,7 @@ struct sysfs_ops {
 };
 
 struct sysfs_tag_info {
+   struct net *net_ns;
 };
 
 struct sysfs_tagged_dir_operations {
diff --git a/net/Kconfig b/net/Kconfig
index ab4e6da..250585e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -30,7 +30,7 @@ menu Networking options
 config NET_NS
bool Network namespace support
default n
-   depends on EXPERIMENTAL  !SYSFS
+   depends on EXPERIMENTAL
help
  Allow user space to create what appear to be multiple instances
  of the network stack.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 61ead1d..2aa64d0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -13,7 +13,9 @@
 #include linux/kernel.h
 #include linux/netdevice.h
 #include linux/if_arp.h
+#include linux/nsproxy.h
 #include net/sock.h
+#include net/net_namespace.h
 #include linux/rtnetlink.h
 #include linux/wireless.h
 #include net/iw_handler.h
@@ -431,6 +433,23 @@ static void netdev_release(struct device *d)
kfree((char *)dev - dev-padded);
 }
 
+static const void *net_sb_tag(struct sysfs_tag_info *info)
+{
+   return info-net_ns;
+}
+
+static const void *net_kobject_tag(struct kobject *kobj)
+{
+   struct net_device *dev;
+   dev = container_of(kobj, struct net_device, dev.kobj);
+   return dev-nd_net;
+}
+
+static const struct sysfs_tagged_dir_operations net_tagged_dir_operations = {
+   .sb_tag = net_sb_tag,
+   .kobject_tag = net_kobject_tag,
+};
+
 static struct class net_class = {

[PATCH] zd1211rw: Fix alignment problems

2007-12-01 Thread Ulrich Kunitz

Shaddy Baddah found an alignment problem with zd1211rw driver at
2007-11-19. This patch fixes it, it is based on the patch proposed by
Herbert Xu. The alignment 4 has been the agreed value on the
linux-wireless mailing list.

Notify that the problem does only affect the old zd1211rw softmac
driver and not the zd1211rw-mac80211 driver. Daniel Drake has
already provided a patch for the replacement of the softmac
driver, which this patch will break.

Signed-off-by: Ulrich Kunitz [EMAIL PROTECTED]
---
 drivers/net/wireless/zd1211rw/zd_mac.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c 
b/drivers/net/wireless/zd1211rw/zd_mac.c
index a903645..5298a8b 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -1130,6 +1130,8 @@ static void zd_mac_rx(struct zd_mac *mac, struct sk_buff 
*skb)
__skb_trim(skb, skb-len -
(IEEE80211_FCS_LEN + sizeof(struct rx_status)));
 
+   ZD_ASSERT(IS_ALIGNED((unsigned long)skb-data, 4));
+
update_qual_rssi(mac, skb-data, skb-len, stats.signal,
 status-signal_strength);
 
@@ -1166,15 +1168,19 @@ static void do_rx(unsigned long mac_ptr)
 int zd_mac_rx_irq(struct zd_mac *mac, const u8 *buffer, unsigned int length)
 {
struct sk_buff *skb;
+   unsigned int reserved =
+   ALIGN(max_t(unsigned int,
+   sizeof(struct zd_rt_hdr), ZD_PLCP_HEADER_SIZE), 4) -
+   ZD_PLCP_HEADER_SIZE;
 
-   skb = dev_alloc_skb(sizeof(struct zd_rt_hdr) + length);
+   skb = dev_alloc_skb(reserved + length);
if (!skb) {
struct ieee80211_device *ieee = zd_mac_to_ieee80211(mac);
dev_warn(zd_mac_dev(mac), Could not allocate skb.\n);
ieee-stats.rx_dropped++;
return -ENOMEM;
}
-   skb_reserve(skb, sizeof(struct zd_rt_hdr));
+   skb_reserve(skb, reserved);
memcpy(__skb_put(skb, length), buffer, length);
skb_queue_tail(mac-rx_queue, skb);
tasklet_schedule(mac-rx_tasklet);
-- 
1.5.3.6
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: ZD1211RW unaligned accesses...

2007-12-01 Thread Ulrich Kunitz

John W. Linville wrote:

 So, did the patch below fix the problem?  Should I apply it?
 
 John

John,

the patch would have worked, but I have sent a second one to the
list, which is based on Herbert's and has an assert to be able to test
the patch on x86.

You should be notify that the mac80211 driver, doesn't suffer
from the problem and Daniel has already provided a patch to
replace zd1211rw by the mac80211 driver. Daniel's patch must of
course break by the new patch.

-- 
Uli Kunitz
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/4] datagram: mem_scheudle functions

2007-12-01 Thread Herbert Xu

On Wed, Nov 28, 2007 at 01:52:59PM -0500, Hideo AOKI wrote:

 +static inline int sk_wmem_schedule(struct sock *sk, int size)
 +{
 + if (sk-sk_type == SOCK_STREAM)
 + return sk_stream_wmem_schedule(sk, size);
 + else if (sk-sk_type == SOCK_DGRAM)
 + return sk_datagram_wmem_schedule(sk, size);
 + else
 + return 1;
 +}

Why do we need this function? As far as I can see we always know
whether it's a stream or datagram socket at compile time so doing
a run-time test is pointless.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [net] xrfm depends on crypto

2007-12-01 Thread Sebastian Siewior

net/built-in.o: In function `xfrm_find_algo':
xfrm_algo.c:(.text+0x61c82): undefined reference to `crypto_has_alg'

Signed-off-by: Sebastian Siewior [EMAIL PROTECTED]
---
 net/xfrm/Kconfig |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 577a4f8..35ed342 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -4,6 +4,7 @@
 config XFRM
bool
depends on NET
+   select CRYPTO
 
 config XFRM_USER
tristate Transformation user configuration interface
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [net] RxRPC session sockets depend on crypto

2007-12-01 Thread Sebastian Siewior

net/built-in.o: In function `rxrpc_destroy_s':
ar-key.c:(.text+0x653cd): undefined reference to `crypto_free_tfm'
net/built-in.o: In function `rxrpc_instantiate_s':
ar-key.c:(.text+0x65496): undefined reference to `crypto_alloc_base'

Signed-off-by: Sebastian Siewior [EMAIL PROTECTED]
---
 net/rxrpc/Kconfig |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index e662f1d..60c4738 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -6,6 +6,7 @@ config AF_RXRPC
tristate RxRPC session sockets
depends on INET  EXPERIMENTAL
select KEYS
+   select CRYPTO
help
  Say Y or M here to include support for RxRPC session sockets (just
  the transport part, not the presentation part: (un)marshalling is
@@ -31,7 +32,6 @@ config AF_RXRPC_DEBUG
 config RXKAD
tristate RxRPC Kerberos security
depends on AF_RXRPC
-   select CRYPTO
select CRYPTO_MANAGER
select CRYPTO_BLKCIPHER
select CRYPTO_PCBC
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] udp: memory accounting in IPv4

2007-12-01 Thread Herbert Xu

On Wed, Nov 28, 2007 at 01:53:36PM -0500, Hideo AOKI wrote:

 +/**
 + *   __skb_queue_purge_and_sub_memory_allocated
 + *   - empty a list and subtruct memory allocation counter
 + *   @sk:   sk
 + *   @list: list to empty
 + *   Delete all buffers on an sk_buff list and subtruct the
 + *   truesize of the sk_buff for memory accounting. Each buffer
 + *   is removed from the list and one reference dropped. This
 + *   function does not take the list lock and the caller must
 + *   hold the relevant locks to use it.
 + */
 +static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock 
 *sk,
 + struct sk_buff_head *list)
 +{
 + struct sk_buff *skb;
 + int purged_skb_size = 0;
 + while ((skb = __skb_dequeue(list)) != NULL) {
 + purged_skb_size += sk_datagram_pages(skb-truesize);
 + kfree_skb(skb);
 + }
 + atomic_sub(purged_skb_size, sk-sk_prot-memory_allocated);
 +}

Thanks, this is a lot better than before!

However, I'm still a little concerned about the effect of two more
atomic op's per packet that we're adding here.  Hang on a sec, that
should've been Dave's line since atomic ops are cheap on x86 :)

But seriously, it's not so much that we have two more atomic op's
per packet, but we have two more writes to a single global counter
for each packet.  This is going to really suck on SMP.

So what I'd like to see is a scheme that's similar to sk_forward_alloc.
The idea is that each socket allocates memory using mem_schedule and
then stores it in sk_forward_alloc.  Each packet then only has to
add to/subtract from sk_forward_alloc.

There is one big problem with this though, UDP is not serialised like
TCP.  So you can't just use sk_forward_alloc since it's not an atomic_t.

We'll need to think about this one a bit more.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] XFRM: SPD auditing fix to include the netmask/prefix-length

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 09:51:48AM -0500, Paul Moore wrote:

 Steve and/or Joy, could we get a verdict on this issue?  The lack of a 
 netmask 
 in the SPD audit messages is pretty serious so I'd like to see this fixed as 
 soon as possible.

I'll take the resounding silence as an indication of approval :)

Patch applied to net-2.6.25.  Thanks Paul.
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] remove extra memset from dn_fib_check_nh

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 06:54:01PM +0300, Denis V. Lunev wrote:
 [PATCH] remove extra memset from dn_fib_check_nh
 
 Signed-off-by: Denis V. Lunev [EMAIL PROTECTED]

Applied to net-2.6.25.  Thanks Denis!
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 1/6][CORE] Remove unneeded ifdefs from sysctl_net_core.c

2007-12-01 Thread Pavel Emelyanov

They include the whole file, but it is already compiled
out when SYSCTL=n, since it is obj-$(CONFIG_SYSCTL) target
in the Makefile.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 113cc72..277c8fa 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -13,8 +13,6 @@
 #include net/sock.h
 #include net/xfrm.h
 
-#ifdef CONFIG_SYSCTL
-
 ctl_table core_table[] = {
 #ifdef CONFIG_NET
{
@@ -151,5 +149,3 @@ ctl_table core_table[] = {
},
{ .ctl_name = 0 }
 };
-
-#endif
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote:
 
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

All applied to net-2.6.25.

 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
 index b0cf075..f97b2a4 100644
 --- a/include/net/net_namespace.h
 +++ b/include/net/net_namespace.h
 @@ -41,6 +43,7 @@ struct net {
  
   /* unix sockets */
   int sysctl_unix_max_dgram_qlen;
 + struct ctl_table_header *unix_ctl;
  };

But I gotta say this struct/file is going to be enormous.  It's also
one of those files that causes everything to get recompiled.  Maybe
we ought to make a rule that each subsystem only gets to have at most
one entry in it :)

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 2/6][CORE] Isolate the net/core/ sysctl table

2007-12-01 Thread Pavel Emelyanov

Using ctl paths we can put all the stuff, related to net/core/
sysctl table, into one file and remove all the references on it.

As a good side effect this hides the core_table name from
the global name space :)

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/include/net/sock.h b/include/net/sock.h
index 43e3cd9..8f32a71 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1325,10 +1325,6 @@ extern __u32 sysctl_rmem_max;
 
 extern void sk_init(void);
 
-#ifdef CONFIG_SYSCTL
-extern struct ctl_table core_table[];
-#endif
-
 extern int sysctl_optmem_max;
 
 extern __u32 sysctl_wmem_default;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 277c8fa..e322713 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,10 +10,11 @@
 #include linux/module.h
 #include linux/socket.h
 #include linux/netdevice.h
+#include linux/init.h
 #include net/sock.h
 #include net/xfrm.h
 
-ctl_table core_table[] = {
+static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
{
.ctl_name   = NET_CORE_WMEM_MAX,
@@ -149,3 +150,19 @@ ctl_table core_table[] = {
},
{ .ctl_name = 0 }
 };
+
+static __initdata struct ctl_path net_core_path[] = {
+   { .procname = net, .ctl_name = CTL_NET, },
+   { .procname = core, .ctl_name = NET_CORE, },
+   { },
+};
+
+static __init int sysctl_core_init(void)
+{
+   struct ctl_table_header *hdr;
+
+   hdr = register_sysctl_paths(net_core_path, net_core_table);
+   return hdr == NULL ? -ENOMEM : 0;
+}
+
+__initcall(sysctl_core_init);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index c50c793..747fc55 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,12 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-   {
-   .ctl_name   = NET_CORE,
-   .procname   = core,
-   .mode   = 0555,
-   .child  = core_table,
-   },
 #ifdef CONFIG_INET
{
.ctl_name   = NET_IPV4,
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 3/6][IPv4] Cleanup the sysctl_net_ipv4.c file

2007-12-01 Thread Pavel Emelyanov

This includes several cleanups:

 * move the ipv4_config to af_inet.c;
 * tune Makefile to compile out this file when needed;
 * remove additional sysctl_ip_nonlocal_bind declaration
   (it is already declared in net/ip.h);
 * remove no nonger needed ifdefs from this file.

This is a preparation for using ctl paths for net/ipv4/
sysctl table.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 93fe396..ad40ef3 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,9 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \
 tcp_minisocks.o tcp_cong.o \
 datagram.o raw.o udp.o udplite.o \
 arp.o icmp.o devinet.o af_inet.o  igmp.o \
-sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+fib_frontend.o fib_semantics.o \
 inet_fragment.o
 
+obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
 obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c75f20b..0e4b6eb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk);
 static struct list_head inetsw[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw_lock);
 
+struct ipv4_config ipv4_config;
+
+EXPORT_SYMBOL(ipv4_config);
+
 /* New destruction routine */
 
 void inet_sock_destruct(struct sock *sk)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index bec6fe8..3546424 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -21,19 +21,10 @@
 #include net/cipso_ipv4.h
 #include net/inet_frag.h
 
-/* From af_inet.c */
-extern int sysctl_ip_nonlocal_bind;
-
-#ifdef CONFIG_SYSCTL
 static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
-#endif
-
-struct ipv4_config ipv4_config;
-
-#ifdef CONFIG_SYSCTL
 
 static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
@@ -887,7 +878,3 @@ ctl_table ipv4_table[] = {
},
{ .ctl_name = 0 }
 };
-
-#endif /* CONFIG_SYSCTL */
-
-EXPORT_SYMBOL(ipv4_config);
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 4/6][IPV4] Use ctl paths to register net/ipv4/ table

2007-12-01 Thread Pavel Emelyanov

This is the same as I did for the net/core/ table in the
second patch in his series: use the paths and isolate 
the whole table in the .c file.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/include/net/ip.h b/include/net/ip.h
index 83fb9f1..7e1dd67 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -393,6 +393,4 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int 
__user *name, int nlen,
 extern int ip_misc_proc_init(void);
 #endif
 
-extern struct ctl_table ipv4_table[];
-
 #endif /* _IP_H */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3546424..bfd0dec 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -13,6 +13,7 @@
 #include linux/igmp.h
 #include linux/inetdevice.h
 #include linux/seqlock.h
+#include linux/init.h
 #include net/snmp.h
 #include net/icmp.h
 #include net/ip.h
@@ -247,7 +248,7 @@ static int strategy_allowed_congestion_control(ctl_table 
*table, int __user *nam
 
 }
 
-ctl_table ipv4_table[] = {
+static struct ctl_table ipv4_table[] = {
{
.ctl_name   = NET_IPV4_TCP_TIMESTAMPS,
.procname   = tcp_timestamps,
@@ -878,3 +879,19 @@ ctl_table ipv4_table[] = {
},
{ .ctl_name = 0 }
 };
+
+static __initdata struct ctl_path net_ipv4_path[] = {
+   { .procname = net, .ctl_name = CTL_NET, },
+   { .procname = ipv4, .ctl_name = NET_IPV4, },
+   { },
+};
+
+static __init int sysctl_ipv4_init(void)
+{
+   struct ctl_table_header *hdr;
+
+   hdr = register_sysctl_paths(net_ipv4_path, ipv4_table);
+   return hdr == NULL ? -ENOMEM : 0;
+}
+
+__initcall(sysctl_ipv4_init);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 747fc55..a4f0ed8 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,14 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-#ifdef CONFIG_INET
-   {
-   .ctl_name   = NET_IPV4,
-   .procname   = ipv4,
-   .mode   = 0555,
-   .child  = ipv4_table
-   },
-#endif
 #ifdef CONFIG_TR
{
.ctl_name   = NET_TR,
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 2/2][NEIGH] Use the ctl paths to create neighbours sysctls

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 08:29:16PM +0300, Pavel Emelyanov wrote:
 Since the path is modified, it is put on the stack, to avoid
 possible races with multiple calls to neigh_sysctl_register() : it
 is called by protocols and I didn't find any protection in this
 case. Did I overlooked the rtnl lock?. 

I think the only caller that can be a module is IPv6 :)

 The stack growth of the neigh_sysctl_register() is 40 bytes. I
 believe this is OK, since this is not that much and this function 
 is not called with the deep stack (device/protocols register).

Yes it's fine.

 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

Both applied to net-2.6.25.  Thanks Pavel!
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 6/6][CORE] Remove the empty net_table

2007-12-01 Thread Pavel Emelyanov

I have removed all the entries from this table (core_table,
ipv4_table and tr_table), so now we can safely drop it.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/include/linux/net.h b/include/linux/net.h
index f95f12c..c414d90 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -337,7 +337,6 @@ static const struct proto_ops name##_ops = {
\
 
 #ifdef CONFIG_SYSCTL
 #include linux/sysctl.h
-extern ctl_table net_table[];
 extern int net_msg_cost;
 extern int net_msg_burst;
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 946a01c..894a177 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -199,14 +199,6 @@ static struct ctl_table root_table[] = {
.mode   = 0555,
.child  = vm_table,
},
-#ifdef CONFIG_NET
-   {
-   .ctl_name   = CTL_NET,
-   .procname   = net,
-   .mode   = 0555,
-   .child  = net_table,
-   },
-#endif
{
.ctl_name   = CTL_FS,
.procname   = fs,
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 16ad14b..665e856 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -30,10 +30,6 @@
 #include linux/if_tr.h
 #endif
 
-struct ctl_table net_table[] = {
-   { 0 },
-};
-
 static struct list_head *
 net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] udp: memory accounting in IPv4

2007-12-01 Thread Eric Dumazet


Herbert Xu a écrit :

On Wed, Nov 28, 2007 at 01:53:36PM -0500, Hideo AOKI wrote:

+/**
+ * __skb_queue_purge_and_sub_memory_allocated
+ * - empty a list and subtruct memory allocation counter
+ * @sk:   sk
+ * @list: list to empty
+ * Delete all buffers on an sk_buff list and subtruct the
+ * truesize of the sk_buff for memory accounting. Each buffer
+ * is removed from the list and one reference dropped. This
+ * function does not take the list lock and the caller must
+ * hold the relevant locks to use it.
+ */
+static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock *sk,
+   struct sk_buff_head *list)
+{
+   struct sk_buff *skb;
+   int purged_skb_size = 0;
+   while ((skb = __skb_dequeue(list)) != NULL) {
+   purged_skb_size += sk_datagram_pages(skb-truesize);
+   kfree_skb(skb);
+   }
+   atomic_sub(purged_skb_size, sk-sk_prot-memory_allocated);
+}


Thanks, this is a lot better than before!

However, I'm still a little concerned about the effect of two more
atomic op's per packet that we're adding here.  Hang on a sec, that
should've been Dave's line since atomic ops are cheap on x86 :)

But seriously, it's not so much that we have two more atomic op's
per packet, but we have two more writes to a single global counter
for each packet.  This is going to really suck on SMP.

So what I'd like to see is a scheme that's similar to sk_forward_alloc.
The idea is that each socket allocates memory using mem_schedule and
then stores it in sk_forward_alloc.  Each packet then only has to
add to/subtract from sk_forward_alloc.

There is one big problem with this though, UDP is not serialised like
TCP.  So you can't just use sk_forward_alloc since it's not an atomic_t.

We'll need to think about this one a bit more.


I agree adding yet another atomics ops is a big problem.

Another idea, coupled with recent work on percpu storage done by Christoph 
Lameter, would be to use kind of a percpu_counter :


We dont really need strong and precise memory accounting (UDP , but TCP as 
well), just some kind of limit to avoid memory to be too much used.


That is, updating a percpu variable, and doing some updates to a global 
counter only when this percpu variable escapes from a given range.


Lot of contended cache lines could benefit from this relaxing (count of 
sockets...)


I would wait first that Christoph work is done, so that we dont need atomic 
ops on local cpu storage (and no need to disable preemption too).


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Mark Lord


Mark Lord wrote:

Now that we have network namespace support merged it is time to
revisit the sysfs support so we can remove the dependency on !SYSFS.

...
Now that the namespace updates are part of 2.6.24,
there is a major inconsistency in network EXPORT_SYMBOLs.

It used to be that an external network module could get away without
having to add a MODULE_LICENSE(GPL*) line to the source.

In support of that, common networking functions (still) use EXPORT_SYMBOL()
rather than the more restrictive EXPORT_SYMBOL_GPL().

Eg.  register_netdev(), sk_alloc(), __dev_get_by_name().

But now, none of those three are actually usable by default,
because they all require init_net, which is EXPORT_SYMBOL_GPL().

So.. It appears that one of three things should really happen next:

1) Change the other exports to also be EXPORT_SYMBOL_GPL.

2) Have register_netdev, sk_alloc, and __dev_get_by_name default
to using init_net when NULL is specified in the namespace field.

or
3) Change init_net to be EXPORT_SYMBOL_GPL.

..

Obviously that should instead say:

3) Change init_net to be EXPORT_SYMBOL instead of EXPORT_SYMBOL_GPL.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] udp: memory accounting in IPv4

2007-12-01 Thread Herbert Xu

On Sat, Dec 01, 2007 at 02:08:31PM +0100, Eric Dumazet wrote:
 
 I agree adding yet another atomics ops is a big problem.
 
 Another idea, coupled with recent work on percpu storage done by Christoph 
 Lameter, would be to use kind of a percpu_counter :

Yes that's an interesting idea.

 We dont really need strong and precise memory accounting (UDP , but TCP as 
 well), just some kind of limit to avoid memory to be too much used.

BTW it's no big deal for TCP because it's completely serialised so it
doesn't use atomic ops for the accounting.  More importantly, it uses
sk_forward_alloc so not every packet needs to touch the global counter.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 2/3][IPV6] Unify and cleanup calls to addrconf_sysctl_register

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 09:54:51PM +0300, Pavel Emelyanov wrote:

 +static void addrconf_sysctl_register(struct inet6_dev *idev,
 + struct ipv6_devconf *p)

Due to your simplification you no longer need the second argument
as it can now be derived from the first as is the case for IPv4.

So let's get rid of that while we're at it.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register

2007-12-01 Thread Pavel Emelyanov

Herbert Xu wrote:
 On Fri, Nov 30, 2007 at 09:26:58PM +0300, Pavel Emelyanov wrote:
 Besides, the inet_device is passed to this function, but
 it is not needed there at all - just the device name and
 ifindex are required.
 
 But it is called devinet_* so an in_dev kind of makes sense :)
 
  #ifdef CONFIG_SYSCTL
 -devinet_sysctl_register(in_dev, in_dev-cnf);
 +devinet_sysctl_register(dev, in_dev-cnf);
 
 How about just giving it in_dev instead?

Hmm... Makes sense. Should I recreate the while set or
just make the incremental one?

 Thanks,

Thanks,
Pavel
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register

2007-12-01 Thread Herbert Xu

On Sat, Dec 01, 2007 at 04:25:21PM +0300, Pavel Emelyanov wrote:

  How about just giving it in_dev instead?
 
 Hmm... Makes sense. Should I recreate the while set or
 just make the incremental one?

I've applied 1/3 for both cases so please just resend 2/3 and 3/3.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace

2007-12-01 Thread Pavel Emelyanov

Denis V. Lunev wrote:
 Herbert Xu wrote:
 On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote:
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]
 All applied to net-2.6.25.

 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
 index b0cf075..f97b2a4 100644
 --- a/include/net/net_namespace.h
 +++ b/include/net/net_namespace.h
 @@ -41,6 +43,7 @@ struct net {
  
 /* unix sockets */
 int sysctl_unix_max_dgram_qlen;
 +   struct ctl_table_header *unix_ctl;
  };
 But I gotta say this struct/file is going to be enormous.  It's also
 one of those files that causes everything to get recompiled.  Maybe
 we ought to make a rule that each subsystem only gets to have at most
 one entry in it :)

 Thanks,
 
 Good point, thanks. We'll start thinking in that direction. Right now it
 is not finally cursed with all staff around.

Agree, the point is good :) but it has one pitfall :(

Look, now we make _one_ dereference to get any net-xxx variable 
(sysctl, list head, lock, etc). When we force each subsystem 
has it's private pointer on this, we'll make them take _two_ 
dereferences. Before the whole net namespace stuff started we
made _zero_ dereferences :) This may tell upon the performance.

I'm not claiming that this is the major case against this idea,
but when developing this idea, I think we should keep that fact
in ming and pay good attention to performance regressions.

 Regards,
   Den

Thanks,
Pavel

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

namespace support requires network modules to say GPL

2007-12-01 Thread Mark Lord


Now that we have network namespace support merged it is time to
revisit the sysfs support so we can remove the dependency on !SYSFS.

...

Now that the namespace updates are part of 2.6.24,
there is a major inconsistency in network EXPORT_SYMBOLs.

It used to be that an external network module could get away without
having to add a MODULE_LICENSE(GPL*) line to the source.

In support of that, common networking functions (still) use EXPORT_SYMBOL()
rather than the more restrictive EXPORT_SYMBOL_GPL().

Eg.  register_netdev(), sk_alloc(), __dev_get_by_name().

But now, none of those three are actually usable by default,
because they all require init_net, which is EXPORT_SYMBOL_GPL().

So.. It appears that one of three things should really happen next:

1) Change the other exports to also be EXPORT_SYMBOL_GPL.

2) Have register_netdev, sk_alloc, and __dev_get_by_name default
to using init_net when NULL is specified in the namespace field.

or 


3) Change init_net to be EXPORT_SYMBOL_GPL.

Right now, things are just a bit inconsistent, and it's not clear
whether the namespace changes intended this consequence or not.

Cheers

(as for me, I think all kernel modules are GPL, whether they have
the MODULE_LICENSE line or not, so flames to /dev/null on that).
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 1/3][IPV4] Cleanup the devinet_sysctl_register

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 09:21:00PM +0300, Pavel Emelyanov wrote:
 I moved the call to kmalloc() from the *t declaration into 
 the code (this is confusing when a variable is initialized
 with the result of some call) and removed unneeded comment 
 near the error path. Just like I did with the neigh ctl-s.
 
 Besides, I fixed the goto's and the labels - they were indented
 with spaces :(
 
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

Applied to net-2.6.25.  Thanks.
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 09:26:58PM +0300, Pavel Emelyanov wrote:
 
 Besides, the inet_device is passed to this function, but
 it is not needed there at all - just the device name and
 ifindex are required.

But it is called devinet_* so an in_dev kind of makes sense :)

  #ifdef CONFIG_SYSCTL
 - devinet_sysctl_register(in_dev, in_dev-cnf);
 + devinet_sysctl_register(dev, in_dev-cnf);

How about just giving it in_dev instead?

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 5/6][TR] Use ctl paths to register net/token-ring/ table

2007-12-01 Thread Pavel Emelyanov

The same thing for token-ring - use ctl paths and get
rid of external references on the tr_table.

Unfortunately, I couldn't split this patch into cleanup and 
use-the-paths parts.

As a lame excuse I can say, that the cleanup is just moving
the tr_table from one file to another. Since the source 
file is empty after the move, I remove it.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 046e9d9..5bcec8b 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -49,9 +49,6 @@ static inline struct trh_hdr *tr_hdr(const struct sk_buff 
*skb)
 {
return (struct trh_hdr *)skb_mac_header(skb);
 }
-#ifdef CONFIG_SYSCTL
-extern struct ctl_table tr_table[];
-#endif
 #endif
 
 /* This is an Token-Ring LLC structure */
diff --git a/net/802/Makefile b/net/802/Makefile
index 977704a..68569ff 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -3,9 +3,8 @@
 #
 
 # Check the p8022 selections against net/core/Makefile.
-obj-$(CONFIG_SYSCTL)   += sysctl_net_802.o
 obj-$(CONFIG_LLC)  += p8022.o psnap.o
-obj-$(CONFIG_TR)   += p8022.o psnap.o tr.o sysctl_net_802.o
+obj-$(CONFIG_TR)   += p8022.o psnap.o tr.o
 obj-$(CONFIG_NET_FC)   += fc.o
 obj-$(CONFIG_FDDI) += fddi.o
 obj-$(CONFIG_HIPPI)+= hippi.o
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
deleted file mode 100644
index ead5603..000
--- a/net/802/sysctl_net_802.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* -*- linux-c -*-
- * sysctl_net_802.c: sysctl interface to net 802 subsystem.
- *
- * Begun April 1, 1996, Mike Shaver.
- * Added /proc/sys/net/802 directory entry (empty =) ). [MS]
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include linux/mm.h
-#include linux/if_tr.h
-#include linux/sysctl.h
-
-#ifdef CONFIG_TR
-extern int sysctl_tr_rif_timeout;
-#endif
-
-struct ctl_table tr_table[] = {
-#ifdef CONFIG_TR
-   {
-   .ctl_name   = NET_TR_RIF_TIMEOUT,
-   .procname   = rif_timeout,
-   .data   = sysctl_tr_rif_timeout,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec
-   },
-#endif /* CONFIG_TR */
-   { 0 },
-};
diff --git a/net/802/tr.c b/net/802/tr.c
index d8a5386..23fa151 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -35,6 +35,7 @@
 #include linux/proc_fs.h
 #include linux/seq_file.h
 #include linux/init.h
+#include linux/sysctl.h
 #include net/arp.h
 #include net/net_namespace.h
 
@@ -634,6 +635,26 @@ struct net_device *alloc_trdev(int sizeof_priv)
return alloc_netdev(sizeof_priv, tr%d, tr_setup);
 }
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tr_table[] = {
+   {
+   .ctl_name   = NET_TR_RIF_TIMEOUT,
+   .procname   = rif_timeout,
+   .data   = sysctl_tr_rif_timeout,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
+   { 0 },
+};
+
+static __initdata struct ctl_path tr_path[] = {
+   { .procname = net, .ctl_name = CTL_NET, },
+   { .procname = token-ring, .ctl_name = NET_TR, },
+   { }
+};
+#endif
+
 /*
  * Called during bootup.  We don't actually have to initialise
  * too much for this.
@@ -644,7 +665,9 @@ static int __init rif_init(void)
rif_timer.expires  = sysctl_tr_rif_timeout;
setup_timer(rif_timer, rif_check_expire, 0);
add_timer(rif_timer);
-
+#ifdef CONFIG_SYSCTL
+   register_sysctl_paths(tr_path, tr_table);
+#endif
proc_net_fops_create(init_net, tr_rif, S_IRUGO, rif_seq_fops);
return 0;
 }
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index a4f0ed8..16ad14b 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -31,14 +31,6 @@
 #endif
 
 struct ctl_table net_table[] = {
-#ifdef CONFIG_TR
-   {
-   .ctl_name   = NET_TR,
-   .procname   = token-ring,
-   .mode   = 0555,
-   .child  = tr_table,
-   },
-#endif
{ 0 },
 };
 
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 (resend) 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register

2007-12-01 Thread Pavel Emelyanov

Currently this call is used to register sysctls for devices
and for the default confs. The all sysctls are registered
separately.

Besides, the inet_device is passed to this function, but it is
not needed there at all - just the device name and ifindex are 
required. 

Thanks to Herbert, who noticed, that this call doesn't even 
require the devconf pointer (the last argument) - all we need 
we can take from the in_device itself.

The fix is to make a __devinet_sysctl_register(), which registers
sysctls for all devices we need, including default and all :)

The original devinet_sysctl_register() works with struct net_device,
not the inet_device, and calls the introduced function, passing 
the device name and ifindex (to be used as procname and ctl_name)
into it.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 34c34c6..385896f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -98,8 +98,7 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 int destroy);
 #ifdef CONFIG_SYSCTL
-static void devinet_sysctl_register(struct in_device *in_dev,
-   struct ipv4_devconf *p);
+static void devinet_sysctl_register(struct in_device *idev);
 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
 #endif
 
@@ -173,7 +172,7 @@ static struct in_device *inetdev_init(struct net_device 
*dev)
in_dev_hold(in_dev);
 
 #ifdef CONFIG_SYSCTL
-   devinet_sysctl_register(in_dev, in_dev-cnf);
+   devinet_sysctl_register(in_dev);
 #endif
ip_mc_init_dev(in_dev);
if (dev-flags  IFF_UP)
@@ -1120,7 +1119,7 @@ static int inetdev_event(struct notifier_block *this, 
unsigned long event,
neigh_sysctl_unregister(in_dev-arp_parms);
neigh_sysctl_register(dev, in_dev-arp_parms, NET_IPV4,
  NET_IPV4_NEIGH, ipv4, NULL, NULL);
-   devinet_sysctl_register(in_dev, in_dev-cnf);
+   devinet_sysctl_register(in_dev);
 #endif
break;
}
@@ -1502,13 +1501,11 @@ static struct devinet_sysctl_table {
},
 };
 
-static void devinet_sysctl_register(struct in_device *in_dev,
-   struct ipv4_devconf *p)
+static void __devinet_sysctl_register(char *dev_name, int ctl_name,
+   struct ipv4_devconf *p)
 {
int i;
-   struct net_device *dev = in_dev ? in_dev-dev : NULL;
struct devinet_sysctl_table *t;
-   char *dev_name = NULL;
 
t = kmemdup(devinet_sysctl, sizeof(*t), GFP_KERNEL);
if (!t)
@@ -1519,13 +1516,7 @@ static void devinet_sysctl_register(struct in_device 
*in_dev,
t-devinet_vars[i].extra1 = p;
}
 
-   if (dev) {
-   dev_name = dev-name;
-   t-devinet_dev[0].ctl_name = dev-ifindex;
-   } else {
-   dev_name = default;
-   t-devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-   }
+   t-devinet_dev[0].ctl_name = ctl_name;
 
/*
 * Make a copy of dev_name, because '.procname' is regarded as const
@@ -1557,6 +1548,12 @@ out:
return;
 }
 
+static void devinet_sysctl_register(struct in_device *idev)
+{
+   return __devinet_sysctl_register(idev-dev-name, idev-dev-ifindex,
+   idev-cnf);
+}
+
 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
 {
if (p-sysctl) {
@@ -1578,9 +1575,10 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
 #ifdef CONFIG_SYSCTL
-   devinet_sysctl.sysctl_header =
-   register_sysctl_table(devinet_sysctl.devinet_root_dir);
-   devinet_sysctl_register(NULL, ipv4_devconf_dflt);
+   __devinet_sysctl_register(all, NET_PROTO_CONF_ALL,
+   ipv4_devconf);
+   __devinet_sysctl_register(default, NET_PROTO_CONF_DEFAULT,
+   ipv4_devconf_dflt);
 #endif
 }
 
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 (resend) 3/3][IPV4] Use ctl paths to register devinet sysctls

2007-12-01 Thread Pavel Emelyanov

This looks very much like the patch for neighbors.

The path is also located on the stack and is prepared
inside the function. This time, the call to the registering
function is guarded with the RTNL lock, but I decided
to keep it on the stack not to litter the devinet.c file
with unneeded names and to make it look similar to the
neighbors code.

This is also intended to help us with the net namespaces
and saves the vmlinux size as well - this time by more 
than 670 bytes.

The difference from the first version is just the patch
offsets, that changed due to changes in the patch #2.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 385896f..c19c8db 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1431,11 +1431,8 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int 
__user *name, int nlen,
 
 static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
-   ctl_table   devinet_vars[__NET_IPV4_CONF_MAX];
-   ctl_table   devinet_dev[2];
-   ctl_table   devinet_conf_dir[2];
-   ctl_table   devinet_proto_dir[2];
-   ctl_table   devinet_root_dir[2];
+   struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
+   char *dev_name;
 } devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, forwarding,
@@ -1467,38 +1464,6 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
  promote_secondaries),
},
-   .devinet_dev = {
-   {
-   .ctl_name   = NET_PROTO_CONF_ALL,
-   .procname   = all,
-   .mode   = 0555,
-   .child  = devinet_sysctl.devinet_vars,
-   },
-   },
-   .devinet_conf_dir = {
-   {
-   .ctl_name   = NET_IPV4_CONF,
-   .procname   = conf,
-   .mode   = 0555,
-   .child  = devinet_sysctl.devinet_dev,
-   },
-   },
-   .devinet_proto_dir = {
-   {
-   .ctl_name   = NET_IPV4,
-   .procname   = ipv4,
-   .mode   = 0555,
-   .child  = devinet_sysctl.devinet_conf_dir,
-   },
-   },
-   .devinet_root_dir = {
-   {
-   .ctl_name   = CTL_NET,
-   .procname   = net,
-   .mode   = 0555,
-   .child  = devinet_sysctl.devinet_proto_dir,
-   },
-   },
 };
 
 static void __devinet_sysctl_register(char *dev_name, int ctl_name,
@@ -1507,6 +1472,16 @@ static void __devinet_sysctl_register(char *dev_name, 
int ctl_name,
int i;
struct devinet_sysctl_table *t;
 
+#define DEVINET_CTL_PATH_DEV   3
+
+   struct ctl_path devinet_ctl_path[] = {
+   { .procname = net, .ctl_name = CTL_NET, },
+   { .procname = ipv4, .ctl_name = NET_IPV4, },
+   { .procname = conf, .ctl_name = NET_IPV4_CONF, },
+   { /* to be set */ },
+   { },
+   };
+
t = kmemdup(devinet_sysctl, sizeof(*t), GFP_KERNEL);
if (!t)
goto out;
@@ -1516,24 +1491,20 @@ static void __devinet_sysctl_register(char *dev_name, 
int ctl_name,
t-devinet_vars[i].extra1 = p;
}
 
-   t-devinet_dev[0].ctl_name = ctl_name;
-
/*
 * Make a copy of dev_name, because '.procname' is regarded as const
 * by sysctl and we wouldn't want anyone to change it under our feet
 * (see SIOCSIFNAME).
 */
-   dev_name = kstrdup(dev_name, GFP_KERNEL);
-   if (!dev_name)
+   t-dev_name = kstrdup(dev_name, GFP_KERNEL);
+   if (!t-dev_name)
goto free;
 
-   t-devinet_dev[0].procname= dev_name;
-   t-devinet_dev[0].child   = t-devinet_vars;
-   t-devinet_conf_dir[0].child  = t-devinet_dev;
-   t-devinet_proto_dir[0].child = t-devinet_conf_dir;
-   t-devinet_root_dir[0].child  = t-devinet_proto_dir;
+   devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t-dev_name;
+   devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
 
-   t-sysctl_header = register_sysctl_table(t-devinet_root_dir);
+   t-sysctl_header = register_sysctl_paths(devinet_ctl_path,
+   t-devinet_vars);
if (!t-sysctl_header)
goto free_procname;
 
@@ -1541,7 +1512,7 @@ static void __devinet_sysctl_register(char *dev_name, int 
ctl_name,
return;
 
 free_procname:
-   kfree(dev_name);
+   kfree(t-dev_name);
 free:
kfree(t);
 out:
@@ -1560,7 +1531,7 @@

[PATCH net-2.6.25 (resend) 2/3][IPV6] Unify and cleanup calls to addrconf_sysctl_register

2007-12-01 Thread Pavel Emelyanov

Currently this call is (ab)used similar to devinet one - it 
registers sysctls for devices and for the default confs, while
the all sysctls are registered separately. But unlike its 
devinet brother, the passed inet6_device is needed.

The fix is to make a __addrconf_sysctl_register(), which registers
sysctls for all devices we need, including default and all :)

The original addrconf_sysctl_register() calls the introduced 
function, passing the inet6_device, device name and ifindex (to 
be used as procname and ctl_name) into it. 

Thanks to Herbert again for pointing out, that we can shrink the 
argument list to 1 :)

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2d2886a..ea1673d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -101,7 +101,7 @@
 #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct 
ipv6_devconf *p);
+static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
 #endif
 
@@ -400,7 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device 
*dev)
  NET_IPV6_NEIGH, ipv6,
  ndisc_ifinfo_sysctl_change,
  NULL);
-   addrconf_sysctl_register(ndev, ndev-cnf);
+   addrconf_sysctl_register(ndev);
 #endif
/* protected by rtnl_lock */
rcu_assign_pointer(dev-ip6_ptr, ndev);
@@ -2386,7 +2386,7 @@ static int addrconf_notify(struct notifier_block *this, 
unsigned long event,
  NET_IPV6, NET_IPV6_NEIGH, ipv6,
  ndisc_ifinfo_sysctl_change,
  NULL);
-   addrconf_sysctl_register(idev, idev-cnf);
+   addrconf_sysctl_register(idev);
 #endif
err = snmp6_register_dev(idev);
if (err)
@@ -4118,12 +4118,11 @@ static struct addrconf_sysctl_table
},
 };
 
-static void addrconf_sysctl_register(struct inet6_dev *idev, struct 
ipv6_devconf *p)
+static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
+   struct inet6_dev *idev, struct ipv6_devconf *p)
 {
int i;
-   struct net_device *dev = idev ? idev-dev : NULL;
struct addrconf_sysctl_table *t;
-   char *dev_name = NULL;
 
t = kmemdup(addrconf_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
@@ -4133,13 +4132,6 @@ static void addrconf_sysctl_register(struct inet6_dev 
*idev, struct ipv6_devconf
t-addrconf_vars[i].data += (char*)p - (char*)ipv6_devconf;
t-addrconf_vars[i].extra1 = idev; /* embedded; no ref */
}
-   if (dev) {
-   dev_name = dev-name;
-   t-addrconf_dev[0].ctl_name = dev-ifindex;
-   } else {
-   dev_name = default;
-   t-addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-   }
 
/*
 * Make a copy of dev_name, because '.procname' is regarded as const
@@ -4150,6 +4142,7 @@ static void addrconf_sysctl_register(struct inet6_dev 
*idev, struct ipv6_devconf
if (!dev_name)
goto free;
 
+   t-addrconf_dev[0].ctl_name = ctl_name;
t-addrconf_dev[0].procname = dev_name;
 
t-addrconf_dev[0].child = t-addrconf_vars;
@@ -4172,6 +4165,12 @@ out:
return;
 }
 
+static void addrconf_sysctl_register(struct inet6_dev *idev)
+{
+   __addrconf_sysctl_register(idev-dev-name, idev-dev-ifindex,
+   idev, idev-cnf);
+}
+
 static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
 {
if (p-sysctl) {
@@ -4270,9 +4269,10 @@ int __init addrconf_init(void)
ipv6_addr_label_rtnl_register();
 
 #ifdef CONFIG_SYSCTL
-   addrconf_sysctl.sysctl_header =
-   register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
-   addrconf_sysctl_register(NULL, ipv6_devconf_dflt);
+   __addrconf_sysctl_register(all, NET_PROTO_CONF_ALL,
+   NULL, ipv6_devconf);
+   __addrconf_sysctl_register(default, NET_PROTO_CONF_DEFAULT,
+   NULL, ipv6_devconf_dflt);
 #endif
 
return 0;
-- 
1.5.3.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Jochen Friedrich


Hi Vitaly,


With that patch fixed.c now fully emulates MDIO bus, thus no need
to duplicate PHY layer functionality. That, in turn, drastically
simplifies the code, and drops down line count.

As an additional bonus, now there is no need to register MDIO bus
for each PHY, all emulated PHYs placed on the platform fixed MDIO bus.
There is also no more need to pre-allocate PHYs via .config option,
this is all now handled dynamically.

p.s. Don't even try to understand patch content! Better: apply patch
and look into resulting drivers/net/phy/fixed.c.
  
If i understand your code correctly, you seem to rely on the fact 
that fixed_phy_add() is called before the fixed MDIO bus is scanned for 
devices. How is this supposed to work for modules or for the 
PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned 
during fs_soc initialization but during device initialization?


I tried to add fixed-phy support to fs_enet, but the fixed phy is not 
found this way.


--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -36,6 +36,7 @@
#include linux/fs.h
#include linux/platform_device.h
#include linux/phy.h
+#include linux/phy_fixed.h

#include linux/vmalloc.h
#include asm/pgtable.h
@@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct device_node *np,
   struct device_node *phynode, *mdionode;
   struct resource res;
   int ret = 0, len;
+   const u32 *data;
+   struct fixed_phy_status status = {};
+
+   data  = of_get_property(np, fixed-link, NULL);
+   if (data) {
+   status.link = 1;
+   status.duplex = data[1];
+   status.speed  = data[2];
+
+   ret = fixed_phy_add(PHY_POLL, data[0], status);
+   if (ret)
+   return ret;
+
+   snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data);
+   return 0;
+   }

-   const u32 *data = of_get_property(np, phy-handle, len);
+   data = of_get_property(np, phy-handle, len);
   if (!data || len != 4)
   return -EINVAL;

Thanks,
Jochen
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Please pull 'upstream-davem' branch of wireless-2.6

2007-12-01 Thread Herbert Xu

On Fri, Nov 30, 2007 at 09:23:06PM -0500, John W. Linville wrote:
 Dave/Herbert,
 
 Here are a few intended for 2.6.25.  The bulk of them are the beginnings
 of support for 802.11n in mac80211.  There is also a rework of the
 support for devices which can run scans in hardware, and a couple of
 additions to feature-removal-schedule.txt heralding the end of softmac.

All applied to net-2.6.25.  Thanks John!
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH net-2.6.25 (resend) 3/3][IPV6] Use ctl paths to register addrconf sysctls

2007-12-01 Thread Herbert Xu

On Sat, Dec 01, 2007 at 04:46:41PM +0300, Pavel Emelyanov wrote:
 This looks very much like the patch for ipv4's devinet.
 
 This is also intended to help us with the net namespaces
 and saves the ipv6.ko size by ~320 bytes.
 
 The difference from the first version is just the patch
 offsets, that changed due to changes in the patch #2.
 
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

Both applied.  Thanks!
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace

2007-12-01 Thread Denis V. Lunev

Herbert Xu wrote:
 On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote:
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]
 
 All applied to net-2.6.25.
 
 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
 index b0cf075..f97b2a4 100644
 --- a/include/net/net_namespace.h
 +++ b/include/net/net_namespace.h
 @@ -41,6 +43,7 @@ struct net {
  
  /* unix sockets */
  int sysctl_unix_max_dgram_qlen;
 +struct ctl_table_header *unix_ctl;
  };
 
 But I gotta say this struct/file is going to be enormous.  It's also
 one of those files that causes everything to get recompiled.  Maybe
 we ought to make a rule that each subsystem only gets to have at most
 one entry in it :)
 
 Thanks,

Good point, thanks. We'll start thinking in that direction. Right now it
is not finally cursed with all staff around.

Regards,
Den
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH net-2.6.25 (resend) 3/3][IPV6] Use ctl paths to register addrconf sysctls

2007-12-01 Thread Pavel Emelyanov

This looks very much like the patch for ipv4's devinet.

This is also intended to help us with the net namespaces
and saves the ipv6.ko size by ~320 bytes.

The difference from the first version is just the patch
offsets, that changed due to changes in the patch #2.

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ea1673d..dbff389 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3848,10 +3848,7 @@ static struct addrconf_sysctl_table
 {
struct ctl_table_header *sysctl_header;
ctl_table addrconf_vars[__NET_IPV6_MAX];
-   ctl_table addrconf_dev[2];
-   ctl_table addrconf_conf_dir[2];
-   ctl_table addrconf_proto_dir[2];
-   ctl_table addrconf_root_dir[2];
+   char *dev_name;
 } addrconf_sysctl __read_mostly = {
.sysctl_header = NULL,
.addrconf_vars = {
@@ -4072,50 +4069,6 @@ static struct addrconf_sysctl_table
.ctl_name   =   0,  /* sentinel */
}
},
-   .addrconf_dev = {
-   {
-   .ctl_name   =   NET_PROTO_CONF_ALL,
-   .procname   =   all,
-   .mode   =   0555,
-   .child  =   addrconf_sysctl.addrconf_vars,
-   },
-   {
-   .ctl_name   =   0,  /* sentinel */
-   }
-   },
-   .addrconf_conf_dir = {
-   {
-   .ctl_name   =   NET_IPV6_CONF,
-   .procname   =   conf,
-   .mode   =   0555,
-   .child  =   addrconf_sysctl.addrconf_dev,
-   },
-   {
-   .ctl_name   =   0,  /* sentinel */
-   }
-   },
-   .addrconf_proto_dir = {
-   {
-   .ctl_name   =   NET_IPV6,
-   .procname   =   ipv6,
-   .mode   =   0555,
-   .child  =   
addrconf_sysctl.addrconf_conf_dir,
-   },
-   {
-   .ctl_name   =   0,  /* sentinel */
-   }
-   },
-   .addrconf_root_dir = {
-   {
-   .ctl_name   =   CTL_NET,
-   .procname   =   net,
-   .mode   =   0555,
-   .child  =   
addrconf_sysctl.addrconf_proto_dir,
-   },
-   {
-   .ctl_name   =   0,  /* sentinel */
-   }
-   },
 };
 
 static void __addrconf_sysctl_register(char *dev_name, int ctl_name,
@@ -4124,6 +4077,17 @@ static void __addrconf_sysctl_register(char *dev_name, 
int ctl_name,
int i;
struct addrconf_sysctl_table *t;
 
+#define ADDRCONF_CTL_PATH_DEV  3
+
+   struct ctl_path addrconf_ctl_path[] = {
+   { .procname = net, .ctl_name = CTL_NET, },
+   { .procname = ipv6, .ctl_name = NET_IPV6, },
+   { .procname = conf, .ctl_name = NET_IPV6_CONF, },
+   { /* to be set */ },
+   { },
+   };
+
+
t = kmemdup(addrconf_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
goto out;
@@ -4138,19 +4102,15 @@ static void __addrconf_sysctl_register(char *dev_name, 
int ctl_name,
 * by sysctl and we wouldn't want anyone to change it under our feet
 * (see SIOCSIFNAME).
 */
-   dev_name = kstrdup(dev_name, GFP_KERNEL);
-   if (!dev_name)
+   t-dev_name = kstrdup(dev_name, GFP_KERNEL);
+   if (!t-dev_name)
goto free;
 
-   t-addrconf_dev[0].ctl_name = ctl_name;
-   t-addrconf_dev[0].procname = dev_name;
-
-   t-addrconf_dev[0].child = t-addrconf_vars;
-   t-addrconf_conf_dir[0].child = t-addrconf_dev;
-   t-addrconf_proto_dir[0].child = t-addrconf_conf_dir;
-   t-addrconf_root_dir[0].child = t-addrconf_proto_dir;
+   addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t-dev_name;
+   addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name;
 
-   t-sysctl_header = register_sysctl_table(t-addrconf_root_dir);
+   t-sysctl_header = register_sysctl_paths(addrconf_ctl_path,
+   t-addrconf_vars);
if (t-sysctl_header == NULL)
goto free_procname;
 
@@ -4158,7 +4118,7 @@ static void __addrconf_sysctl_register(char *dev_name, 
int ctl_name,
return;
 
 free_procname:
-   kfree(dev_name);
+   kfree(t-dev_name);
 free:
kfree(t);
 out:
@@ -4177,7 +4137,7 @@ static void addrconf_sysctl_unregister(struct 
ipv6_devconf *p)
struct addrconf_sysctl_table *t = p-sysctl;
p-sysctl = NULL;

Re: [PATCH net-2.6.25 (resend) 3/3][IPV4] Use ctl paths to register devinet sysctls

2007-12-01 Thread Herbert Xu

On Sat, Dec 01, 2007 at 04:39:58PM +0300, Pavel Emelyanov wrote:

 The difference from the first version is just the patch
 offsets, that changed due to changes in the patch #2.
 
 Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

All applied to net-2.6.25.  Thanks Pavel.
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

SSB: No is not an answer

2007-12-01 Thread Arnaldo Carvalho de Melo

Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n

Support for the Sonics Silicon Backplane bus.
You only need to enable this option, if you are
configuring a kernel for an embedded system with
this bus.
It will be auto-selected if needed in other
environments.

The module will be called ssb.

If unsure, say N.

Sonics Silicon Backplane support (SSB) [M/y/?] (NEW)
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: SSB: No is not an answer

2007-12-01 Thread John W. Linville

On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote:
 Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n
 
 Support for the Sonics Silicon Backplane bus.
 You only need to enable this option, if you are
 configuring a kernel for an embedded system with
 this bus.
 It will be auto-selected if needed in other
 environments.
 
 The module will be called ssb.
 
 If unsure, say N.
 
 Sonics Silicon Backplane support (SSB) [M/y/?] (NEW)

I think this is OK -- it isn't really offering the choice to say
no anyway.  You must have turned-on B44 or B43(LEGACY) already?

So, your choice is merely whether to have it built-in or as a module.

John
-- 
John W. Linville
[EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: SSB: No is not an answer

2007-12-01 Thread Arnaldo Carvalho de Melo

Em Sat, Dec 01, 2007 at 12:45:32PM -0500, John W. Linville escreveu:
 On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote:
  Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n
  
  Support for the Sonics Silicon Backplane bus.
  You only need to enable this option, if you are
  configuring a kernel for an embedded system with
  this bus.
  It will be auto-selected if needed in other
  environments.
  
  The module will be called ssb.
  
  If unsure, say N.
  
  Sonics Silicon Backplane support (SSB) [M/y/?] (NEW)
 
 I think this is OK -- it isn't really offering the choice to say
 no anyway.  You must have turned-on B44 or B43(LEGACY) already?
 
 So, your choice is merely whether to have it built-in or as a module.

Ok, so the comment on being unsure is wrong as we can't say N as
suggested :-)

- Arnaldo
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Vitaly Bordug

On Sat, 01 Dec 2007 14:48:54 +0100
Jochen Friedrich wrote:

 Hi Vitaly,
 
  With that patch fixed.c now fully emulates MDIO bus, thus no need
  to duplicate PHY layer functionality. That, in turn, drastically
  simplifies the code, and drops down line count.
 
  As an additional bonus, now there is no need to register MDIO bus
  for each PHY, all emulated PHYs placed on the platform fixed MDIO
  bus. There is also no more need to pre-allocate PHYs via .config
  option, this is all now handled dynamically.
 
  p.s. Don't even try to understand patch content! Better: apply patch
  and look into resulting drivers/net/phy/fixed.c.

 If i understand your code correctly, you seem to rely on the fact 
 that fixed_phy_add() is called before the fixed MDIO bus is scanned
 for devices. How is this supposed to work for modules or for the 
 PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned 
 during fs_soc initialization but during device initialization?

Well, this is kind of known issue - to work it around for now, place PHY lib 
after fs_enet in
Makefile. This way it works for me for _NEW_BINDING and mpc866ads.

 I tried to add fixed-phy support to fs_enet, but the fixed phy is not 
 found this way.
 
The point is I have the code and it works now(for fs_enet etc.), but I need to 
find the way for the fixed phy pinning to work in either order with phylib. If 
you have ideas, please go ahead :)


 --- a/drivers/net/fs_enet/fs_enet-main.c
 +++ b/drivers/net/fs_enet/fs_enet-main.c
 @@ -36,6 +36,7 @@
  #include linux/fs.h
  #include linux/platform_device.h
  #include linux/phy.h
 +#include linux/phy_fixed.h
  
  #include linux/vmalloc.h
  #include asm/pgtable.h
 @@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct
 device_node *np, struct device_node *phynode, *mdionode;
 struct resource res;
 int ret = 0, len;
 +   const u32 *data;
 +   struct fixed_phy_status status = {};
 +
 +   data  = of_get_property(np, fixed-link, NULL);
 +   if (data) {
 +   status.link = 1;
 +   status.duplex = data[1];
 +   status.speed  = data[2];
 +
 +   ret = fixed_phy_add(PHY_POLL, data[0], status);
 +   if (ret)
 +   return ret;
 +
 +   snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data);
 +   return 0;
 +   }
  
 -   const u32 *data = of_get_property(np, phy-handle, len);
 +   data = of_get_property(np, phy-handle, len);
 if (!data || len != 4)
 return -EINVAL;
 
 Thanks,
 Jochen


-- 
Sincerely, Vitaly
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Stephen Hemminger

On Sat, 01 Dec 2007 08:10:17 -0500
Mark Lord [EMAIL PROTECTED] wrote:

  Now that we have network namespace support merged it is time to
  revisit the sysfs support so we can remove the dependency on !SYSFS.
 ...
 
 Now that the namespace updates are part of 2.6.24,
 there is a major inconsistency in network EXPORT_SYMBOLs.
 
 It used to be that an external network module could get away without
 having to add a MODULE_LICENSE(GPL*) line to the source.
 
 In support of that, common networking functions (still) use EXPORT_SYMBOL()
 rather than the more restrictive EXPORT_SYMBOL_GPL().
 
 Eg.  register_netdev(), sk_alloc(), __dev_get_by_name().
 
 But now, none of those three are actually usable by default,
 because they all require init_net, which is EXPORT_SYMBOL_GPL().
 

Then init_net needs to be not GPL limited. Sorry, we need to allow
non GPL network drivers.  There is a fine line between keeping the
binary seething masses from accessing random kernel functions, and allowing
reasonable (but still non GPL) things like ndiswrapper to use network
device interface.

-- 
Stephen Hemminger [EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Mcast packet loss 2.6.8.1 kernel

2007-12-01 Thread Bernd Eckenfels

On Fri, Nov 16, 2007 at 12:38:22PM +0100, Eric Dumazet wrote:
 Hello Bernd
 
 I did some investigations on the netstat -s problem and one
  fix is to change the size of char buf1[1024], buf2[1024];

I changed it now to 2048, and included your page aligend io buffer patch. It
is available in net-tools HEAD (net-tools.berlios.de)

It would be good if I can get a few tests on different architectures.

Gruss
Bernd
-- 
  (OO) -- [EMAIL PROTECTED] --
 ( .. )[EMAIL PROTECTED],linux.de,debian.org}  http://www.eckes.org/
  o--o   1024D/E383CD7E  [EMAIL PROTECTED]  v:+497211603874  f:+49721151516129
(OO)  When cryptography is outlawed, bayl bhgynjf jvyy unir cevinpl!
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Alan Cox

 Then init_net needs to be not GPL limited. Sorry, we need to allow
 non GPL network drivers.  There is a fine line between keeping the

Why - they aren't exactly likely to be permissible by law

 binary seething masses from accessing random kernel functions, and allowing
 reasonable (but still non GPL) things like ndiswrapper to use network
 device interface.

Its up to the ndiswrapper authors how the licence their code, but they
should respect how we licence ours.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace

2007-12-01 Thread Eric W. Biederman

Pavel Emelyanov [EMAIL PROTECTED] writes:

 But I gotta say this struct/file is going to be enormous.  It's also
 one of those files that causes everything to get recompiled.  Maybe
 we ought to make a rule that each subsystem only gets to have at most
 one entry in it :)

 Thanks,
 
 Good point, thanks. We'll start thinking in that direction. Right now it
 is not finally cursed with all staff around.

 Agree, the point is good :) but it has one pitfall :(

 Look, now we make _one_ dereference to get any net-xxx variable 
 (sysctl, list head, lock, etc). When we force each subsystem 
 has it's private pointer on this, we'll make them take _two_ 
 dereferences. Before the whole net namespace stuff started we
 made _zero_ dereferences :) This may tell upon the performance.

 I'm not claiming that this is the major case against this idea,
 but when developing this idea, I think we should keep that fact
 in ming and pay good attention to performance regressions.

Currently in my proof of concept tree I am at 65 variables and 648 bytes.
This includes patches that are largely complete for ipv4.  In number
of variables this is about half of the current struct net_device,
so I think the usage looks managable.

I agree that both performance and size are significant concerns,
and that is essentially why struct net has the structure it does
today.

I print the size of struct net out at boot, we have to actually look
at struct net when we make changes, so I don't think size bloat
is going to happen unnoticed.

By keeping the size below PAGE_SIZE, and keeping the number of
variables per network subsystem few and small we should be ok.

The fact that changing struct net causes the core of
the networking stack to recompile is an added bonus
that should also discourage people from playing with it to
much.

My recommendation is to keep an eye on struct net and if what we
are doing there becomes a problem address it then.

Eric
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Eric W. Biederman

Stephen Hemminger [EMAIL PROTECTED] writes:

 On Sat, 01 Dec 2007 08:10:17 -0500
 Mark Lord [EMAIL PROTECTED] wrote:

  Now that we have network namespace support merged it is time to
  revisit the sysfs support so we can remove the dependency on !SYSFS.
 ...
 
 Now that the namespace updates are part of 2.6.24,
 there is a major inconsistency in network EXPORT_SYMBOLs.
 
 It used to be that an external network module could get away without
 having to add a MODULE_LICENSE(GPL*) line to the source.
 
 In support of that, common networking functions (still) use EXPORT_SYMBOL()
 rather than the more restrictive EXPORT_SYMBOL_GPL().
 
 Eg.  register_netdev(), sk_alloc(), __dev_get_by_name().
 
 But now, none of those three are actually usable by default,
 because they all require init_net, which is EXPORT_SYMBOL_GPL().

Which alternative kernel does the above comment apply to?

 Then init_net needs to be not GPL limited. Sorry, we need to allow
 non GPL network drivers.

For the record network drivers should not be affected.  As a practical
measure that just gets unmaintainable and it is unnecessary.

There are specific exceptions where network drivers mess with the userspace
interfaces where I do have some impact.  However if you are messing
with our userspace interface especially with network namespaces in place
I don't see how it is possible for you to be anything other then a derivative
work, and something we need in tree to keep maintenance a manageable thing.

It should just be the core of the network stack that struct net has some
effect on.

 There is a fine line between keeping the
 binary seething masses from accessing random kernel functions, and allowing
 reasonable (but still non GPL) things like ndiswrapper to use network
 device interface.

Does ndiswrapper break?  If so what dubious and unsupportable thing is
it doing?

Eric
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Stephen Hemminger

On Sat, 1 Dec 2007 19:23:41 +
Alan Cox [EMAIL PROTECTED] wrote:

  Then init_net needs to be not GPL limited. Sorry, we need to allow
  non GPL network drivers.  There is a fine line between keeping the
 
 Why - they aren't exactly likely to be permissible by law

Matter of debate in which there are several opinions.
I don't like binary modules either, but don't feel qualified to render
a legal opinion.

 
  binary seething masses from accessing random kernel functions, and allowing
  reasonable (but still non GPL) things like ndiswrapper to use network
  device interface.
 
 Its up to the ndiswrapper authors how the licence their code, but they
 should respect how we licence ours.

Then change the license, explicitly and get it approved, forcing license
changes by technically subversive means is bad policy. It is like Euro 
bureaucrats
sneaking in software patents in regulations. If you want to have the debate and
can get it resolved, then I support you.

Actually, the whole mess would go away if the api for dev_get_by_ hadn't
been changed in the namespace transition. IMHO the interface to 
dev_get_by_name()
should not have added a namespace parameter, of the callers in the tree, only
two use a different namespace. So it would have been better to to introduce
dev_get_by_name_ns() with the extra parameter.

Can we get this resolved before 2.6.24 is released? Going back and forth
on API's is just needless frottage.

-- 
Stephen Hemminger [EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Alan Cox

 Then change the license, explicitly and get it approved, forcing license
 changes by technically subversive means is bad policy. It is like Euro 
 bureaucrats

I don't need to - the licence has been the same since about 0.12

Alan
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Eric W. Biederman

Stephen Hemminger [EMAIL PROTECTED] writes:

 Actually, the whole mess would go away if the api for dev_get_by_ hadn't
 been changed in the namespace transition. IMHO the interface to
 dev_get_by_name()
 should not have added a namespace parameter, of the callers in the tree, only
 two use a different namespace. So it would have been better to to introduce
 dev_get_by_name_ns() with the extra parameter.

As a general rule if you are calling dev_get_by_name and taking an init_net
parameter that means you code has not yet been converted to actually support
network namespaces.

Not everything can be safely changed at once so we take it by steps.  When
the code fully supports network namespaces practically nothing will take
an init_net parameter.  The network namespace parameter will come in
some form from userspace.  Either from current or from the network
socket.

Except for boot time initialization I don't know of any cases using
dev_get_by_ that won't need to be modified before the network
namespace work is complete.

I believe I mentioned that this getting the fully network namespace
support was going to take a while and a bunch of patches at the
outset.

 Can we get this resolved before 2.6.24 is released? Going back and forth
 on API's is just needless frottage.

Sure.  We keep the updated dev_get_by_ that takes a network
namespace parameter.

Or is their some legitimate usage of it by out of tree code that
I'm not aware of?

Eric
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Mark Lord


Eric W. Biederman wrote:

Stephen Hemminger [EMAIL PROTECTED] writes:


Actually, the whole mess would go away if the api for dev_get_by_ hadn't
been changed in the namespace transition. IMHO the interface to
dev_get_by_name()
should not have added a namespace parameter, of the callers in the tree, only
two use a different namespace. So it would have been better to to introduce
dev_get_by_name_ns() with the extra parameter.


As a general rule if you are calling dev_get_by_name and taking an init_net
parameter that means you code has not yet been converted to actually support
network namespaces.

Not everything can be safely changed at once so we take it by steps.  When
the code fully supports network namespaces practically nothing will take
an init_net parameter.  The network namespace parameter will come in
some form from userspace.  Either from current or from the network
socket.

Except for boot time initialization I don't know of any cases using
dev_get_by_ that won't need to be modified before the network
namespace work is complete.

I believe I mentioned that this getting the fully network namespace
support was going to take a while and a bunch of patches at the
outset.


Can we get this resolved before 2.6.24 is released? Going back and forth
on API's is just needless frottage.


Sure.  We keep the updated dev_get_by_ that takes a network
namespace parameter.

..

And what should code be passing in when # CONFIG_NET_NS is not set ?
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: SSB: No is not an answer

2007-12-01 Thread Michael Buesch

On Saturday 01 December 2007 20:00:23 Arnaldo Carvalho de Melo wrote:
 Em Sat, Dec 01, 2007 at 12:45:32PM -0500, John W. Linville escreveu:
  On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote:
   Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n
   
   Support for the Sonics Silicon Backplane bus.
   You only need to enable this option, if you are
   configuring a kernel for an embedded system with
   this bus.
   It will be auto-selected if needed in other
   environments.
   
   The module will be called ssb.
   
   If unsure, say N.
   
   Sonics Silicon Backplane support (SSB) [M/y/?] (NEW)
  
  I think this is OK -- it isn't really offering the choice to say
  no anyway.  You must have turned-on B44 or B43(LEGACY) already?
  
  So, your choice is merely whether to have it built-in or as a module.
 
 Ok, so the comment on being unsure is wrong as we can't say N as
 suggested :-)

Oh, come on... Read the _whole_ comment.

-- 
Greetings Michael.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Arjan van de Ven

On Sat, 01 Dec 2007 15:21:12 -0500
Mark Lord [EMAIL PROTECTED] wrote:

 Eric W. Biederman wrote:
  Stephen Hemminger [EMAIL PROTECTED] writes:
  Sure.  We keep the updated dev_get_by_ that takes a network
  namespace parameter.
 ..
 
 And what should code be passing in when # CONFIG_NET_NS is not set ?

network drivers probably really really don't want to call
dev_get_by_XXX...

in fact no NIC driver in drivers/net does so!
Sounds like whatever driver you're looking at has a nasty bug in that
it's using non-driver APIs...



-- 
If you want to reach me at my work email, use [EMAIL PROTECTED]
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Eric W. Biederman

Mark Lord [EMAIL PROTECTED] writes:

 Can we get this resolved before 2.6.24 is released? Going back and forth
 on API's is just needless frottage.

 Sure.  We keep the updated dev_get_by_ that takes a network
 namespace parameter.
 ..

 And what should code be passing in when # CONFIG_NET_NS is not set ?

Mostly CONFIG_NET_NS is a define to keep us from exposing the feature to
user space not to remove the code impact.  People could not stand the
look of the code that would actually allow us to compile everything out.

So all of the struct net * fields remain when !CONFIG_NET_NS.
Including the global variable init_net.

Eric
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv7 iptables] Interface group match

2007-12-01 Thread Jarek Poplawski

Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM:
...

 Index: extensions/libxt_ifgroup.man
 ===
 --- extensions/libxt_ifgroup.man  (revision 0)
 +++ extensions/libxt_ifgroup.man  (revision 0)
 @@ -0,0 +1,36 @@
 +Maches packets on an interface if it is in the same interface group


  +Matches packets on an interface if it is in the same interface group

 +as specified by the
 +.B --ifgroup-in
 +or
 +.B --ifgroup-in


  +.B --ifgroup-out

 +parameter. If a mask is also specified, the masked value of
 +the inteface's group must be equal to the given value of the


  +the interface's group must be equal to the given value of the

 +.B --ifgroup-in
 +or
 +.B --ifgroup-out
 +parameter to match. This match is available in all tables.
 +.TP
 +.BR [!] --ifgroup-in \fIgroup[/mask]\fR
 +This specifies the interface group of input interface and the optional mask.
 +Valid only in the in the


  +Valid only in the

 +.B PREROUTING
 +and
 +.B INPUT
 +and
 +.B FORWARD
 +chains, and user-defined chains which are only called from those
 +chains. 
 +.TP
 +.BR [!] --ifgroup-out \fIgroup[/mask]\fR
 +This specifies the interface group of out interface and the optional mask.


  +This specifies the interface group of output interface and the optional mask.

 +Valid only in the in the
 +.B FORWARD
 +and
 +.B OUTPUT
 +and
 +.B POSTROUTING
 +chains, and user-defined chains which are only called from those
 +chains. 
 +.RS
 +.PP
 +

Regards,
Jarek P.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Please pull 'fixes-jgarzik' branch of wireless-2.6

2007-12-01 Thread Jeff Garzik


John W. Linville wrote:

Jeff,

A few fixes intended for 2.6.24...

Let me know if there are any problems!

Thanks,

John

---

Individual patches are available here:

http://www.kernel.org/pub/linux/kernel/people/linville/wireless-2.6.git 
fixes-jgarzik

---

The following changes since commit d9f8bcbf67a0ee67c8cb0734f003dfe916bb5248:
  Linus Torvalds (1):
Linux 2.6.24-rc3

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
fixes-jgarzik

David Woodhouse (1):
  libertas: Don't set NETIF_F_IPV6_CSUM in dev-features

Holger Schurig (1):
  libertas: let more than one MAC event through



pulled



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv7 iptables] Interface group match

2007-12-01 Thread Jarek Poplawski

Jarek Poplawski wrote, On 12/01/2007 10:19 PM:

 Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM:
 ...
 
 Index: extensions/libxt_ifgroup.man


...

 +Valid only in the in the

   +Valid only in the

 +.B FORWARD
 +and
 +.B OUTPUT
 +and
 +.B POSTROUTING
 +chains, and user-defined chains which are only called from those
 +chains. 
 +.RS
 +.PP
 +
 
 Regards,
 Jarek P.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2.6.24 1/1]S2io: Fixed the case when the card initialization fails on mtu change

2007-12-01 Thread Jeff Garzik


Sreenivasa Honnur wrote:
Fix the case when the card initialization fails on a mtu change and then 
close is called (due to ifdown), which frees non existent rx buffers.

- Returning appropriate error codes in init_nic function.
- In s2io_close function s2io_card_down is called only when device is up.
- In s2io_change_mtu function return value of s2io_card_up function
  is checked and returned if it failed.

Signed-off-by: Surjit Reang [EMAIL PROTECTED]
Signed-off-by: Sreenivasa Honnur [EMAIL PROTECTED]
Signed-off-by: Ramkrishna Vepa [EMAIL PROTECTED]
---
diff -Nurp patch_8/drivers/net/s2io.c patch_9/drivers/net/s2io.c
--- patch_8/drivers/net/s2io.c  2007-11-20 23:31:57.0 +0530
+++ patch_9/drivers/net/s2io.c  2007-11-20 23:13:24.0 +0530
@@ -84,7 +84,7 @@
 #include s2io.h
 #include s2io-regs.h
 
-#define DRV_VERSION 2.0.26.8

+#define DRV_VERSION 2.0.26.9
 
 /* S2io Driver name  version. */

 static char s2io_driver_name[] = Neterion;


applied patch to 2.6.24-rc, except for the above chunk, which failed


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/6] skge: FIFO Ram calculation error

2007-12-01 Thread Jeff Garzik


applied 1-6 and additional 1-2

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3][RESEND] phylib: add PHY interface modes for internal delay for tx and rx only

2007-12-01 Thread Jeff Garzik


Kim Phillips wrote:

Allow phylib specification of cases where hardware needs to configure
PHYs for Internal Delay only on either RX or TX (not both).

Signed-off-by: Kim Phillips [EMAIL PROTECTED]
Tested-by: Anton Vorontsov [EMAIL PROTECTED]
Acked-by: Li Yang [EMAIL PROTECTED]
---
 include/linux/phy.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index f0742b6..e10763d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -58,6 +58,8 @@ typedef enum {
PHY_INTERFACE_MODE_RMII,
PHY_INTERFACE_MODE_RGMII,
PHY_INTERFACE_MODE_RGMII_ID,
+   PHY_INTERFACE_MODE_RGMII_RXID,
+   PHY_INTERFACE_MODE_RGMII_TXID,
PHY_INTERFACE_MODE_RTBI
 } phy_interface_t;


applied 1-3


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch 1/1] ctc: make use of alloc_netdev()

2007-12-01 Thread Jeff Garzik


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] cxgb - fix T2 GSO

2007-12-01 Thread Jeff Garzik


Divy Le Ray wrote:

From: Divy Le Ray [EMAIL PROTECTED]

The patch ensures that a GSO skb has enough headroom
to push an encapsulating cpl_tx_pkt_lso header.

Signed-off-by: Divy Le Ray [EMAIL PROTECTED]
---


applied 1-3


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] sky2: revert to access PCI config via device space

2007-12-01 Thread Jeff Garzik


Stephen Hemminger wrote:

Using the hardware window into PCI config space is more reliable
and smaller/faster than using the pci_config routines. It avoids issues
with MMCONFIG etc.

Reverts: 167f53d05fccb47b6eeadac7f6705b3f2f042d03

Please apply for 2.6.24

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


applied 1-3


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] e1000: Fix NAPI state bug when Rx complete

2007-12-01 Thread Jeff Garzik


Auke Kok wrote:

Don't exit polling when we have not yet used our budget, this causes
the NAPI system to end up with a messed up poll list.

Signed-off-by: Auke Kok [EMAIL PROTECTED]
---

 drivers/net/e1000/e1000_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b7c3070..724f067 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3926,7 +3926,7 @@ e1000_clean(struct napi_struct *napi, int budget)
  work_done, budget);
 
 	/* If no Tx and not enough Rx work done, exit the polling mode */

-   if ((!tx_cleaned  (work_done  budget)) ||
+   if ((!tx_cleaned  (work_done == 0)) ||
   !netif_running(poll_dev)) {


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] SET_NETDEV_DEV() in fec_mpc52xx.c

2007-12-01 Thread Jeff Garzik


David Woodhouse wrote:

This helps to allow the Fedora installer to use the built-in Ethernet on
the Efika for a network install.

Signed-off-by: David Woodhouse [EMAIL PROTECTED]

--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -971,6 +971,8 @@ mpc52xx_fec_probe(struct of_device *op, const struct 
of_device_id *match)
 
 	mpc52xx_fec_reset_stats(ndev);
 
+	SET_NETDEV_DEV(ndev, op-dev);

+
/* Register the new network device */
rv = register_netdev(ndev);


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Stop phy code from returning success to unknown ioctls.

2007-12-01 Thread Jeff Garzik


David Woodhouse wrote:

This kind of sucks, and prevents the Fedora installer from using the
device for network installs...

[EMAIL PROTECTED] phy]# iwconfig eth0
Warning: Driver for device eth0 has been compiled with an ancient version  
of Wireless Extension, while this program support version 11 and later.
Some things may be broken...   
   
eth0ESSID:off/any  Nickname: 
  NWID:0  Channel:0  Access Point: 00:00:BF:81:14:E0   
  Bit Rate:-1.08206e+06 kb/s   Sensitivity=0/0 
  RTS thr:off   Fragment thr:off   
  Encryption key:too big 
  Power Management:off 
   
Signed-off-by: David Woodhouse [EMAIL PROTECTED]


diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 9bc1177..7c9e6e3 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -406,6 +406,9 @@ int phy_mii_ioctl(struct phy_device *phydev,
 phydev-drv-config_init)
phydev-drv-config_init(phydev);
break;
+
+   default:
+   return -ENOTTY;
}
 


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] via-velocity: don't oops on MTU change (resend)

2007-12-01 Thread Jeff Garzik


Stephen Hemminger wrote:

The VIA veloicty driver needs the following to allow changing MTU when down.
The buffer size needs to be computed when device is brought up, not when
device is initialized.  This also fixes a bug where the buffer size was
computed differently on change_mtu versus initial setting.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

---
This is a properly formatted version of previously submitted patch.
Please apply for 2.6.24


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] LIB82596: correct data types for hardware addresses

2007-12-01 Thread Jeff Garzik


Thomas Bogendoerfer wrote:

dma_addr_t is 64bit wide on some architectures (for example 64bit MIPS),
so it's not a good idea to use it for 32bit wide addresses in descriptors.

Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED]
---

 drivers/net/lib82596.c |   50 
 1 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c
index 9a855e5..b59f442 100644
--- a/drivers/net/lib82596.c
+++ b/drivers/net/lib82596.c
@@ -176,8 +176,8 @@ struct i596_reg {
 struct i596_tbd {
unsigned short size;
unsigned short pad;
-   dma_addr_t next;
-   dma_addr_t data;
+   u32next;
+   u32data;
u32 cache_pad[5];   /* Total 32 bytes... */
 };


applied, though its incomplete for today's drivers.  I recommend 
converting those data types to the sparse data types that indicate 
endian-ness (see __le32, etc.).  Then verify that the code passes all 
sparse checks.


See Documentation/sparse.txt for more info.

Also, make sure it passes scripts/checkpatch.pl checks too, while you're 
at it...


Thanks,

Jeff



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Anton Vorontsov

On Sat, Dec 01, 2007 at 02:48:54PM +0100, Jochen Friedrich wrote:
 Hi Vitaly,
 
  With that patch fixed.c now fully emulates MDIO bus, thus no need
  to duplicate PHY layer functionality. That, in turn, drastically
  simplifies the code, and drops down line count.
 
  As an additional bonus, now there is no need to register MDIO bus
  for each PHY, all emulated PHYs placed on the platform fixed MDIO bus.
  There is also no more need to pre-allocate PHYs via .config option,
  this is all now handled dynamically.
 
  p.s. Don't even try to understand patch content! Better: apply patch
  and look into resulting drivers/net/phy/fixed.c.

 If i understand your code correctly, you seem to rely on the fact 
 that fixed_phy_add() is called before the fixed MDIO bus is scanned for 
 devices.

Yes, indeed. The other name of fixed phys are platform phys
or platform MDIO bus on which virtual PHYs are placed.

That is, these phys supposed to be created by the platform setup
code (arch/). The rationale here is: we do hardware emulation, thus
to make drivers actually see that hardware, we have to create it
early.

 I tried to add fixed-phy support to fs_enet, but the fixed phy is not 
 found this way.
 
 --- a/drivers/net/fs_enet/fs_enet-main.c
 +++ b/drivers/net/fs_enet/fs_enet-main.c
 @@ -36,6 +36,7 @@
  #include linux/fs.h
  #include linux/platform_device.h
  #include linux/phy.h
 +#include linux/phy_fixed.h
  
  #include linux/vmalloc.h
  #include asm/pgtable.h
 @@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct device_node *np,
 struct device_node *phynode, *mdionode;
 struct resource res;
 int ret = 0, len;
 +   const u32 *data;
 +   struct fixed_phy_status status = {};
 +
 +   data  = of_get_property(np, fixed-link, NULL);
 +   if (data) {
 +   status.link = 1;
 +   status.duplex = data[1];
 +   status.speed  = data[2];
 +
 +   ret = fixed_phy_add(PHY_POLL, data[0], status);
 +   if (ret)
 +   return ret;
 +
 +   snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data);
 +   return 0;
 +   }
  
 -   const u32 *data = of_get_property(np, phy-handle, len);
 +   data = of_get_property(np, phy-handle, len);
 if (!data || len != 4)
 return -EINVAL;

^^ the correct solution is to implement arch_initcall function
which will create fixed PHYs, and then leave only
snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); part in the
fs_enet's find_phy().

Try add something like this to the fsl_soc.c (compile untested):

- - - -
static int __init of_add_fixed_phys(void)
{
struct device_node *np;
const u32 *prop;
struct fixed_phy_status status = {};

while ((np = of_find_node_by_name(NULL, ethernet))) {
data  = of_get_property(np, fixed-link, NULL);
if (!data)
continue;

status.link = 1;
status.duplex = data[1];
status.speed  = data[2];

ret = fixed_phy_add(PHY_POLL, data[0], status);
if (ret)
return ret;
}

return 0;
}
arch_initcall(of_add_fixed_phys);
- - - -

And remove fixed_phy_add() from the fs_enet. This should work
nicely and also should be ideologically correct. ;-)

 How is this supposed to work for modules or for the
 PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned
 during fs_soc initialization but during device initialization?

We should mark fixed.c as bool. Fake/virtual/fixed/platform PHYs
creation is architecture code anyway, can't be =m.

-- 
Anton Vorontsov
email: [EMAIL PROTECTED]
backup email: [EMAIL PROTECTED]
irc://irc.freenode.net/bd2
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] sky2: align IP header on Rx if possible

2007-12-01 Thread Jeff Garzik


Stephen Hemminger wrote:

The sky2 driver was not aligning the IP header on receive buffers.
This workaround is only needed on hardware with broken FIFO, newer chips
without FIFO can just DMA to unaligned address.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


applied 1-2


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [1/12] pasemi_mac: RX/TX ring management cleanup

2007-12-01 Thread Jeff Garzik


Olof Johansson wrote:

pasemi_mac: RX/TX ring management cleanup

Prepare a bit for supporting multiple TX queues by cleaning up some
of the ring management and shuffle things around a bit.

Signed-off-by: Olof Johansson [EMAIL PROTECTED]


applied 1-12


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] SGISEEQ: use cached memory access to make driver work on IP28

2007-12-01 Thread Jeff Garzik


Thomas Bogendoerfer wrote:

Following patch is clearly 2.6.25 material and is needed to get SGI IP28
machines supported.

Thomas.

SGI IP28 machines would need special treatment (enable adding addtional
wait states) when accessing memory uncached. To avoid this pain I changed
the driver to use only cached access to memory.

Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED]


applied.  As I have noted to you previously, /please/ put extraneous 
comments /after/ a --- separator, so that they are not copied by 
git-am (Linus's email patch import tool) into the permanent kernel 
changelog.


The above should look like:

snip
SGI IP28 machines would need special treatment (enable adding addtional 
wait states) when accessing memory uncached. To avoid this pain I 
changed the driver to use only cached access to memory.


Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED]
---
Following patch is clearly 2.6.25 material and is needed to get SGI IP28 
machines supported.


Thomas.

 drivers/net/sgiseeq.c |  239 
++---

 1 files changed, 166 insertions(+), 73 deletions(-)
/snip


See Documentation/SubmittingPatches for more details, in particular 14) 
The canonical patch format or http://linux.yyz.us/patch-format.html


Jeff


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] ethtool: fix typo on setting speed 10000

2007-12-01 Thread Jeff Garzik


Auke Kok wrote:

From: Jesse Brandeburg [EMAIL PROTECTED]

fix the typo in speed 1 setting.

Signed-off-by: Jesse Brandeburg [EMAIL PROTECTED]
Signed-off-by: Auke Kok [EMAIL PROTECTED]
---

 ethtool.c |2 +-


applied


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Fw: [PATCH] Add the phy_device_release device method.

2007-12-01 Thread Jeff Garzik


Thierry, could you resend this patch to me?

I do not seem to have an apply-able version of this patch anywhere.

The copy DaveM forwarded to me had problems (though the technical 
content looks OK)


Thanks,

Jeff



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Pull request for 'r6040' branch

2007-12-01 Thread Jeff Garzik


Francois Romieu wrote:

Please pull from branch 'r6040' in repository

git://git.kernel.org/pub/scm/linux/kernel/git/romieu/netdev-2.6.git r6040

to get the changes below.

Distance from 'netdev-2.6-upstream' (02e063b58b7c7084bae3d599c54dcf26c8efa9b7)
--

8dd657d2d82657c1d70219a704ccfe4fecfc55be
f00c12227fe587b6c1bbb6b459394db29dc5fac0
c7eaa9bde00c778b53f778d49617353d3b9b0c21
cc27eeb9474a87b2073488f37d9e90e6a3557664

Diffstat


 drivers/net/r6040.c |  138 ++-
 include/linux/pci_ids.h |1 -
 2 files changed, 64 insertions(+), 75 deletions(-)

Shortlog


Francois Romieu (4):
  r6040: compile error
  r6040: remove virt_to_bus
  r6040: erroneous dev-priv
  r6040: cleanups


changes pulled, thanks for helping to clean this up!


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Jeff Garzik


Vitaly Bordug wrote:

With that patch fixed.c now fully emulates MDIO bus, thus no need
to duplicate PHY layer functionality. That, in turn, drastically
simplifies the code, and drops down line count.

As an additional bonus, now there is no need to register MDIO bus
for each PHY, all emulated PHYs placed on the platform fixed MDIO bus.
There is also no more need to pre-allocate PHYs via .config option,
this is all now handled dynamically.

p.s. Don't even try to understand patch content! Better: apply patch
and look into resulting drivers/net/phy/fixed.c.

Signed-off-by: Anton Vorontsov [EMAIL PROTECTED]
Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]


what's the context of this patchset?  2.6.25?

it's late for 2.6.24-rc, IMO.

Do I have the latest version (sent Nov 26 @ 9:29am)?

Jeff



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv7 iproute2 2/2] Interface group as new ip link option

2007-12-01 Thread Jarek Poplawski

Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM:

 Interfaces can be grouped and each group has an unique positive integer ID.
 It can be set via ip link. Symbolic names can be specified in
 /etc/iproute2/rt_ifgroup. Any value of unsigned int32 is valid.

...

 diff --git a/lib/rt_names.c b/lib/rt_names.c
 index 8d019a0..ec6638c 100644
 --- a/lib/rt_names.c
 +++ b/lib/rt_names.c
 @@ -439,10 +439,72 @@ int rtnl_dsfield_a2n(__u32 *id, char *arg)
   }
   }
  
 - res = strtoul(arg, end, 16);
 + res = strtoul(arg, end, 0);


Won't this break any scripts?

Jarek P.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Mark Lord


Arjan van de Ven wrote:

On Sat, 01 Dec 2007 15:21:12 -0500
Mark Lord [EMAIL PROTECTED] wrote:


Eric W. Biederman wrote:

Stephen Hemminger [EMAIL PROTECTED] writes:
Sure.  We keep the updated dev_get_by_ that takes a network
namespace parameter.

..

And what should code be passing in when # CONFIG_NET_NS is not set ?


network drivers probably really really don't want to call
dev_get_by_XXX...

..

Fine.  But all of them want to call sk_alloc(),
and many want to do register_netdev().

So what should they be using there ?
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: namespace support requires network modules to say GPL

2007-12-01 Thread Mark Lord


Arjan van de Ven wrote:

On Sat, 01 Dec 2007 15:21:12 -0500
Mark Lord [EMAIL PROTECTED] wrote:


Eric W. Biederman wrote:

Stephen Hemminger [EMAIL PROTECTED] writes:
Sure.  We keep the updated dev_get_by_ that takes a network
namespace parameter.

..

And what should code be passing in when # CONFIG_NET_NS is not set ?


network drivers probably really really don't want to call
dev_get_by_XXX...

..

Fine.  But all of them want to call sk_alloc(),
and many want to do register_netdev().

So what should they be using there ?

And please STOP trimming the CC list.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Vitaly Bordug

On Sat, 01 Dec 2007 16:59:52 -0500
Jeff Garzik wrote:

 Vitaly Bordug wrote:
  With that patch fixed.c now fully emulates MDIO bus, thus no need
  to duplicate PHY layer functionality. That, in turn, drastically
  simplifies the code, and drops down line count.
  
  As an additional bonus, now there is no need to register MDIO bus
  for each PHY, all emulated PHYs placed on the platform fixed MDIO
  bus. There is also no more need to pre-allocate PHYs via .config
  option, this is all now handled dynamically.
  
  p.s. Don't even try to understand patch content! Better: apply patch
  and look into resulting drivers/net/phy/fixed.c.
  
  Signed-off-by: Anton Vorontsov [EMAIL PROTECTED]
  Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]
 
 what's the context of this patchset?  2.6.25?
 
Fine with it.

 it's late for 2.6.24-rc, IMO.
 
 Do I have the latest version (sent Nov 26 @ 9:29am)?
yes, that's it.

-- 
Sincerely, Vitaly
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality

2007-12-01 Thread Vitaly Bordug

On Sun, 2 Dec 2007 00:34:03 +0300
Anton Vorontsov wrote:

  If i understand your code correctly, you seem to rely on the fact 
  that fixed_phy_add() is called before the fixed MDIO bus is scanned
  for devices.  
 
 Yes, indeed. The other name of fixed phys are platform phys
 or platform MDIO bus on which virtual PHYs are placed.
 
 That is, these phys supposed to be created by the platform setup
 code (arch/). The rationale here is: we do hardware emulation, thus
 to make drivers actually see that hardware, we have to create it
 early.

well that was the intention but... The point is - as device is emulated, 
(nearly) everything is doable,
and the only tradeoff to consider, is how far will we go with that emulation. 
IOW, PHYlib could be tricked
to do the right thing, and I thought about adding module flexibility...

But thinking more about it, it seems that BSP-code-phy-creation just sucks less 
and is clear enough yet flexible.
-- 
Sincerely, Vitaly
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 01/21] [TCP]: Move LOSTRETRANS MIB outside !(L|S) check

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Usually those skbs will have L set, not counting them as lost
retransmissions is misleading.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9f12541..ba05e16 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1160,8 +1160,8 @@ static int tcp_mark_lost_retrans(struct sock *sk)
tp-lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)-sacked |= TCPCB_LOST;
flag |= FLAG_DATA_SACKED;
-   NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
}
+   NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
if (before(ack_seq, new_low_seq))
new_low_seq = ack_seq;
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH net-2.6.25 0/21]: TCP tweaks RB-tree WIP preview

2007-12-01 Thread Ilpo Järvinen

Hi all,

First, there are some rather trivial ones (up to [TCP]: Cleanup
local variables of clean _rtx_queue)...

They're followed by my current efforts to improve SACK processing
latencies with large windows, including RB-tree for fast
searching, problem space split to provide tight bounds that
relate to the amount of new information discovered through the
received SACK block, per skb fack_count to provide access to
fack_count at search provided entry point. There's still need
for additional changes after these to really provide such good
bounds without loop-holes but this is a big step in the right
direction already.

Cost to store all this, seems tremendous, however, the new
structures enable also dropping of many existing caches, making
the end result much nicer (these are not yet done). Potential
kill list includes at least: most _hints drop, recv_sack_cache
drop, highest_sack skb-seqno restore... Also the linked-list
for SACKed part is probably unnecessary but having it makes
things simpler, it can be killed later on rather than adding
complexity to this patch.

...I would like to still take some time to make the last patch
cleaner by extracting at least the DSACK separation from it.
Though, the amount of necessary changes remains still relatively
huge in the core patch. ...Minor FIXMEs todo, only the DSACK one
is really preventive one.

Comments welcome, especially about the last patch.

--
 i.



--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 02/21] [TCP]: Remove superflucious FLAG_DATA_SACKED

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

To get there, highest_sack must have advanced. When it advances,
a new skb is SACKed, which already sets that FLAG. Besides, the
original purpose of it has puzzled me, never understood why
LOST bit setting of retransmitted skb is marked with
FLAG_DATA_SACKED.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |   10 +++---
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ba05e16..6986a2d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1118,12 +1118,11 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, 
int is_dsack,
  * highest SACK block). Also calculate the lowest snd_nxt among the remaining
  * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static int tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
-   int flag = 0;
int cnt = 0;
u32 new_low_seq = tp-snd_nxt;
u32 received_upto = TCP_SKB_CB(tp-highest_sack)-end_seq;
@@ -1131,7 +1130,7 @@ static int tcp_mark_lost_retrans(struct sock *sk)
if (!tcp_is_fack(tp) || !tp-retrans_out ||
!after(received_upto, tp-lost_retrans_low) ||
icsk-icsk_ca_state != TCP_CA_Recovery)
-   return flag;
+   return;
 
tcp_for_write_queue(skb, sk) {
u32 ack_seq = TCP_SKB_CB(skb)-ack_seq;
@@ -1159,7 +1158,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
if (!(TCP_SKB_CB(skb)-sacked  
(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp-lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)-sacked |= TCPCB_LOST;
-   flag |= FLAG_DATA_SACKED;
}
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
@@ -1171,8 +1169,6 @@ static int tcp_mark_lost_retrans(struct sock *sk)
 
if (tp-retrans_out)
tp-lost_retrans_low = new_low_seq;
-
-   return flag;
 }
 
 static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
@@ -1603,7 +1599,7 @@ advance_sp:
for (j = 0; j  used_sacks; j++)
tp-recv_sack_cache[i++] = sp[j];
 
-   flag |= tcp_mark_lost_retrans(sk);
+   tcp_mark_lost_retrans(sk);
 
tcp_verify_left_out(tp);
 
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 04/21] [TCP] Cong.ctrl modules: remove unused good_ack from cong_avoid

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/net/tcp.h|4 ++--
 net/ipv4/tcp_bic.c   |3 +--
 net/ipv4/tcp_cong.c  |2 +-
 net/ipv4/tcp_cubic.c |3 +--
 net/ipv4/tcp_highspeed.c |3 +--
 net/ipv4/tcp_htcp.c  |3 +--
 net/ipv4/tcp_hybla.c |5 ++---
 net/ipv4/tcp_illinois.c  |3 +--
 net/ipv4/tcp_input.c |9 -
 net/ipv4/tcp_lp.c|4 ++--
 net/ipv4/tcp_scalable.c  |3 +--
 net/ipv4/tcp_vegas.c |7 +++
 net/ipv4/tcp_veno.c  |7 +++
 net/ipv4/tcp_yeah.c  |3 +--
 14 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7e58326..cdd0050 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -652,7 +652,7 @@ struct tcp_congestion_ops {
/* lower bound for congestion window (optional) */
u32 (*min_cwnd)(const struct sock *sk);
/* do new cwnd calculation (required) */
-   void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int 
good_ack);
+   void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
@@ -683,7 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp);
 
 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 extern u32 tcp_reno_ssthresh(struct sock *sk);
-extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int 
flag);
+extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight);
 extern u32 tcp_reno_min_cwnd(const struct sock *sk);
 extern struct tcp_congestion_ops tcp_reno;
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5dba0fc..5212ed9 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 
cwnd)
ca-cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack,
- u32 in_flight, int data_acked)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 55fca18..4451750 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -324,7 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
 /* This is Jacobson's slow start and congestion avoidance.
  * SIGCOMM '88, p. 328.
  */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 80bd084..3aa0b23 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 
cwnd)
ca-cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack,
- u32 in_flight, int data_acked)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 14a073d..8b6caaf 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk)
tp-snd_cwnd_clamp = min_t(u32, tp-snd_cwnd_clamp, 0x/128);
 }
 
-static void hstcp_cong_avoid(struct sock *sk, u32 adk,
-u32 in_flight, int data_acked)
+static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 5215691..af99776 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
return max((tp-snd_cwnd * ca-beta)  7, 2U);
 }
 
-static void htcp_cong_avoid(struct sock *sk, u32 ack,
-   u32 in_flight, int data_acked)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index b3e55cf..44618b6 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds)
  * o Give cwnd a new value based on the model proposed
  * o remember increments 1
  */
-static void hybla_cong_avoid(struct sock *sk, u32 ack,
-   u32 in_flight, int flag)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca =

[PATCH 03/21] [TCP]: Unite identical code from two seqno split blocks

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Bogus seqno compares just mislead, the code is identical for
both sides of the seqno compare (and was even executed just
once because of return in between).

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |7 +--
 1 files changed, 1 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6986a2d..29268df 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1246,8 +1246,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct 
tcp_sock *tp,
if (dup_sack  (sacked  TCPCB_RETRANS)) {
if (after(TCP_SKB_CB(skb)-end_seq, tp-undo_marker))
tp-undo_retrans--;
-   if (!after(TCP_SKB_CB(skb)-end_seq, tp-snd_una) 
-   (sacked  TCPCB_SACKED_ACKED))
+   if (sacked  TCPCB_SACKED_ACKED)
*reord = min(fack_count, *reord);
}
 
@@ -1310,10 +1309,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct 
tcp_sock *tp,
 
if (after(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp)))
tp-highest_sack = skb;
-
-   } else {
-   if (dup_sack  (sacked  TCPCB_RETRANS))
-   *reord = min(fack_count, *reord);
}
 
/* D-SACK. We can detect redundant retransmission in S|R and plain R
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 05/21] [TCP] MTUprobe: prepare skb fields earlier

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

They better be valid when call to write_queue functions is made
once things that follow are going in.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_output.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f8266f9..4cb4a7f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1343,7 +1343,6 @@ static int tcp_mtu_probe(struct sock *sk)
sk_charge_skb(sk, nskb);
 
skb = tcp_send_head(sk);
-   tcp_insert_write_queue_before(nskb, skb, sk);
 
TCP_SKB_CB(nskb)-seq = TCP_SKB_CB(skb)-seq;
TCP_SKB_CB(nskb)-end_seq = TCP_SKB_CB(skb)-seq + probe_size;
@@ -1352,6 +1351,8 @@ static int tcp_mtu_probe(struct sock *sk)
nskb-csum = 0;
nskb-ip_summed = skb-ip_summed;
 
+   tcp_insert_write_queue_before(nskb, skb, sk);
+
len = 0;
while (len  probe_size) {
next = tcp_write_queue_next(sk, skb);
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/21] [TCP]: Add unlikely() to urgent handling in clean_rtx_queue

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f0b6c7..365c6d4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2822,8 +2822,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
if (sacked  TCPCB_LOST)
tp-lost_out -= packets_acked;
 
-   if ((sacked  TCPCB_URG)  tp-urg_mode 
-   !before(end_seq, tp-snd_up))
+   if (unlikely((sacked  TCPCB_URG)  tp-urg_mode 
+!before(end_seq, tp-snd_up)))
tp-urg_mode = 0;
 
tp-packets_out -= packets_acked;
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 10/21] [TCP]: Cleanup local variables of clean_rtx_queue

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |   26 --
 1 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 365c6d4..a43c897 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2765,8 +2765,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
u32 now = tcp_time_stamp;
int fully_acked = 1;
int flag = 0;
-   int prior_packets = tp-packets_out;
-   u32 cnt = 0;
+   u32 pkts_acked = 0;
u32 reord = tp-packets_out;
s32 seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
@@ -2774,7 +2773,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
while ((skb = tcp_write_queue_head(sk))  skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u32 end_seq;
-   u32 packets_acked;
+   u32 acked_pcount;
u8 sacked = scb-sacked;
 
if (after(scb-end_seq, tp-snd_una)) {
@@ -2782,14 +2781,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
!after(tp-snd_una, scb-seq))
break;
 
-   packets_acked = tcp_tso_acked(sk, skb);
-   if (!packets_acked)
+   acked_pcount = tcp_tso_acked(sk, skb);
+   if (!acked_pcount)
break;
 
fully_acked = 0;
end_seq = tp-snd_una;
} else {
-   packets_acked = tcp_skb_pcount(skb);
+   acked_pcount = tcp_skb_pcount(skb);
end_seq = scb-end_seq;
}
 
@@ -2801,11 +2800,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
 
if (sacked  TCPCB_RETRANS) {
if (sacked  TCPCB_SACKED_RETRANS)
-   tp-retrans_out -= packets_acked;
+   tp-retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
if ((flag  FLAG_DATA_ACKED) ||
-   (packets_acked  1))
+   (acked_pcount  1))
flag |= FLAG_NONHEAD_RETRANS_ACKED;
} else {
if (seq_rtt  0) {
@@ -2814,20 +2813,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
last_ackt = skb-tstamp;
}
if (!(sacked  TCPCB_SACKED_ACKED))
-   reord = min(cnt, reord);
+   reord = min(pkts_acked, reord);
}
 
if (sacked  TCPCB_SACKED_ACKED)
-   tp-sacked_out -= packets_acked;
+   tp-sacked_out -= acked_pcount;
if (sacked  TCPCB_LOST)
-   tp-lost_out -= packets_acked;
+   tp-lost_out -= acked_pcount;
 
if (unlikely((sacked  TCPCB_URG)  tp-urg_mode 
 !before(end_seq, tp-snd_up)))
tp-urg_mode = 0;
 
-   tp-packets_out -= packets_acked;
-   cnt += packets_acked;
+   tp-packets_out -= acked_pcount;
+   pkts_acked += acked_pcount;
 
/* Initial outgoing SYN's get put onto the write_queue
 * just like anything else we transmit.  It is not
@@ -2852,7 +2851,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
}
 
if (flag  FLAG_ACKED) {
-   u32 pkts_acked = prior_packets - tp-packets_out;
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)-icsk_ca_ops;
 
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 08/21] [TCP]: Remove duplicated code block from clean_rtx_queue

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_input.c |   48 
 1 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ed2077c..3f0b6c7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2799,41 +2799,33 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 
*seq_rtt_p,
tcp_mtup_probe_success(sk, skb);
}
 
-   if (sacked) {
-   if (sacked  TCPCB_RETRANS) {
-   if (sacked  TCPCB_SACKED_RETRANS)
-   tp-retrans_out -= packets_acked;
-   flag |= FLAG_RETRANS_DATA_ACKED;
-   seq_rtt = -1;
-   if ((flag  FLAG_DATA_ACKED) ||
-   (packets_acked  1))
-   flag |= FLAG_NONHEAD_RETRANS_ACKED;
-   } else {
-   if (seq_rtt  0) {
-   seq_rtt = now - scb-when;
-   if (fully_acked)
-   last_ackt = skb-tstamp;
-   }
-   if (!(sacked  TCPCB_SACKED_ACKED))
-   reord = min(cnt, reord);
-   }
-
-   if (sacked  TCPCB_SACKED_ACKED)
-   tp-sacked_out -= packets_acked;
-   if (sacked  TCPCB_LOST)
-   tp-lost_out -= packets_acked;
-
-   if ((sacked  TCPCB_URG)  tp-urg_mode 
-   !before(end_seq, tp-snd_up))
-   tp-urg_mode = 0;
+   if (sacked  TCPCB_RETRANS) {
+   if (sacked  TCPCB_SACKED_RETRANS)
+   tp-retrans_out -= packets_acked;
+   flag |= FLAG_RETRANS_DATA_ACKED;
+   seq_rtt = -1;
+   if ((flag  FLAG_DATA_ACKED) ||
+   (packets_acked  1))
+   flag |= FLAG_NONHEAD_RETRANS_ACKED;
} else {
if (seq_rtt  0) {
seq_rtt = now - scb-when;
if (fully_acked)
last_ackt = skb-tstamp;
}
-   reord = min(cnt, reord);
+   if (!(sacked  TCPCB_SACKED_ACKED))
+   reord = min(cnt, reord);
}
+
+   if (sacked  TCPCB_SACKED_ACKED)
+   tp-sacked_out -= packets_acked;
+   if (sacked  TCPCB_LOST)
+   tp-lost_out -= packets_acked;
+
+   if ((sacked  TCPCB_URG)  tp-urg_mode 
+   !before(end_seq, tp-snd_up))
+   tp-urg_mode = 0;
+
tp-packets_out -= packets_acked;
cnt += packets_acked;
 
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 07/21] [TCP]: Add tcp_for_write_queue_from_safe and use it in mtu_probe

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/net/tcp.h |5 +
 net/ipv4/tcp_output.c |8 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cdd0050..6e392ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1229,6 +1229,11 @@ static inline struct sk_buff 
*tcp_write_queue_next(struct sock *sk, struct sk_bu
for (; (skb != (struct sk_buff *)(sk)-sk_write_queue);\
 skb = skb-next)
 
+#define tcp_for_write_queue_from_safe(skb, tmp, sk)\
+   for (tmp = skb-next;   \
+(skb != (struct sk_buff *)(sk)-sk_write_queue);  \
+skb = tmp, tmp = skb-next)
+
 static inline struct sk_buff *tcp_send_head(struct sock *sk)
 {
return sk-sk_send_head;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20365c0..4f2bd70 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1351,9 +1351,7 @@ static int tcp_mtu_probe(struct sock *sk)
tcp_insert_write_queue_before(nskb, skb, sk);
 
len = 0;
-   while (len  probe_size) {
-   next = tcp_write_queue_next(sk, skb);
-
+   tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb-len, probe_size - len);
if (nskb-ip_summed)
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
@@ -1382,7 +1380,9 @@ static int tcp_mtu_probe(struct sock *sk)
}
 
len += copy;
-   skb = next;
+
+   if (len = probe_size)
+   break;
}
tcp_init_tso_segs(sk, nskb, nskb-len);
 
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 12/21] [TCP]: Introduce per skb fack_counts to retransmit queue

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

The fack count of any skb in the retransmit queue at any
given point in time is:

(skb-fack_count - head_skb-fack_count)

And we'll use this in the SACK processing loops and possibly
elsewhere too.

Original idea came from David S. Miller, included couple of bug
fixes from Tom Quetchenbach [EMAIL PROTECTED].

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/net/tcp.h |   41 +
 1 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5ec1cac..967f256 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -555,6 +555,7 @@ struct tcp_skb_cb {
__u32   seq;/* Starting sequence number */
__u32   end_seq;/* SEQ + FIN + SYN + datalen*/
__u32   when;   /* used to compute rtt's*/
+   unsigned intfack_count; /* speed up SACK processing */
__u8flags;  /* TCP header flags.*/
 
/* NOTE: These must match up to the flags byte in a
@@ -1220,6 +1221,11 @@ static inline struct sk_buff 
*tcp_write_queue_next(struct sock *sk, struct sk_bu
return skb-next;
 }
 
+static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct 
sk_buff *skb)
+{
+   return skb-prev;
+}
+
 #define tcp_for_write_queue(skb, sk)   \
for (skb = (sk)-sk_write_queue.next;   \
 (skb != (struct sk_buff *)(sk)-sk_write_queue);  \
@@ -1241,6 +1247,11 @@ static inline struct sk_buff *tcp_send_head(struct sock 
*sk)
 
 static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
 {
+   struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
+
+   TCP_SKB_CB(skb)-fack_count = TCP_SKB_CB(prev)-fack_count +
+ tcp_skb_pcount(prev);
+
sk-sk_send_head = skb-next;
if (sk-sk_send_head == (struct sk_buff *)sk-sk_write_queue)
sk-sk_send_head = NULL;
@@ -1259,6 +1270,7 @@ static inline void tcp_init_send_head(struct sock *sk)
 
 static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff 
*skb)
 {
+   TCP_SKB_CB(skb)-fack_count = 0;
__skb_queue_tail(sk-sk_write_queue, skb);
 }
 
@@ -1275,9 +1287,36 @@ static inline void tcp_add_write_queue_tail(struct sock 
*sk, struct sk_buff *skb
}
 }
 
+/* This is only used for tcp_send_synack(), so the write queue should
+ * be empty.  If that stops being true, the fack_count assignment
+ * will need to be more elaborate.
+ */
 static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff 
*skb)
 {
+   BUG_ON(!skb_queue_empty(sk-sk_write_queue));
__skb_queue_head(sk-sk_write_queue, skb);
+   TCP_SKB_CB(skb)-fack_count = 0;
+}
+
+/* An insert into the middle of the write queue causes the fack
+ * counts in subsequent packets to become invalid, fix them up.
+ */
+static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *skb)
+{
+   struct sk_buff *prev = skb-prev;
+   unsigned int fc = 0;
+
+   if (prev != (struct sk_buff *) sk-sk_write_queue)
+   fc = TCP_SKB_CB(prev)-fack_count + tcp_skb_pcount(prev);
+
+   tcp_for_write_queue_from(skb, sk) {
+   if (!before(TCP_SKB_CB(skb)-seq, tcp_sk(sk)-snd_nxt) ||
+   TCP_SKB_CB(skb)-fack_count == fc)
+   break;
+
+   TCP_SKB_CB(skb)-fack_count = fc;
+   fc += tcp_skb_pcount(skb);
+   }
 }
 
 /* Insert buff after skb on the write queue of sk.  */
@@ -1286,6 +1325,7 @@ static inline void tcp_insert_write_queue_after(struct 
sk_buff *skb,
struct sock *sk)
 {
__skb_append(skb, buff, sk-sk_write_queue);
+   tcp_reset_fack_counts(sk, buff);
 }
 
 /* Insert skb between prev and next on the write queue of sk.  */
@@ -1294,6 +1334,7 @@ static inline void tcp_insert_write_queue_before(struct 
sk_buff *new,
  struct sock *sk)
 {
__skb_insert(new, skb-prev, skb, sk-sk_write_queue);
+   tcp_reset_fack_counts(sk, new);
 
if (sk-sk_send_head == skb)
sk-sk_send_head = new;
-- 
1.5.0.6

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 11/21] [TCP]: Abstract tp-highest_sack accessing point to next skb

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Pointing to the next skb is necessary to avoid referencing
already SACKed skbs which will soon be on a separate list.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/net/tcp.h |   35 ++-
 net/ipv4/tcp_input.c  |   27 +++
 net/ipv4/tcp_output.c |   11 +++
 3 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6e392ba..5ec1cac 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1267,8 +1267,12 @@ static inline void tcp_add_write_queue_tail(struct sock 
*sk, struct sk_buff *skb
__tcp_add_write_queue_tail(sk, skb);
 
/* Queue it, remembering where we must start sending. */
-   if (sk-sk_send_head == NULL)
+   if (sk-sk_send_head == NULL) {
sk-sk_send_head = skb;
+
+   if (tcp_sk(sk)-highest_sack == NULL)
+   tcp_sk(sk)-highest_sack = skb;
+   }
 }
 
 static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff 
*skb)
@@ -1318,9 +1322,38 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock 
*tp)
 {
if (!tp-sacked_out)
return tp-snd_una;
+
+   if (tp-highest_sack == NULL)
+   return tp-snd_nxt;
+
return TCP_SKB_CB(tp-highest_sack)-seq;
 }
 
+static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff 
*skb)
+{
+   tcp_sk(sk)-highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
+   tcp_write_queue_next(sk, skb);
+}
+
+static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
+{
+   return tcp_sk(sk)-highest_sack;
+}
+
+static inline void tcp_highest_sack_reset(struct sock *sk)
+{
+   tcp_sk(sk)-highest_sack = tcp_write_queue_head(sk);
+}
+
+/* Called when old skb is about to be deleted (to be combined with new skb) */
+static inline void tcp_highest_sack_combine(struct sock *sk,
+   struct sk_buff *old,
+   struct sk_buff *new)
+{
+   if (tcp_sk(sk)-sacked_out  (old == tcp_sk(sk)-highest_sack))
+   tcp_sk(sk)-highest_sack = new;
+}
+
 /* /proc */
 enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a43c897..35753b7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1125,7 +1125,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
struct sk_buff *skb;
int cnt = 0;
u32 new_low_seq = tp-snd_nxt;
-   u32 received_upto = TCP_SKB_CB(tp-highest_sack)-end_seq;
+   u32 received_upto = tcp_highest_sack_seq(tp);
 
if (!tcp_is_fack(tp) || !tp-retrans_out ||
!after(received_upto, tp-lost_retrans_low) ||
@@ -1236,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct 
sk_buff *skb,
return in_sack;
 }
 
-static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
   int *reord, int dup_sack, int fack_count)
 {
+   struct tcp_sock *tp = tcp_sk(sk);
u8 sacked = TCP_SKB_CB(skb)-sacked;
int flag = 0;
 
@@ -1307,8 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct 
tcp_sock *tp,
if (fack_count  tp-fackets_out)
tp-fackets_out = fack_count;
 
-   if (after(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp)))
-   tp-highest_sack = skb;
+   if (!before(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp)))
+   tcp_advance_highest_sack(sk, skb);
}
 
/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1330,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff 
*skb, struct sock *sk,
int dup_sack_in, int *fack_count,
int *reord, int *flag)
 {
-   struct tcp_sock *tp = tcp_sk(sk);
-
tcp_for_write_queue_from(skb, sk) {
int in_sack = 0;
int dup_sack = dup_sack_in;
@@ -1358,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff 
*skb, struct sock *sk,
break;
 
if (in_sack)
-   *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, 
*fack_count);
+   *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, 
*fack_count);
 
*fack_count += tcp_skb_pcount(skb);
}
@@ -1429,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
if (!tp-sacked_out) {
if (WARN_ON(tp-fackets_out))
tp-fackets_out = 0;
-   tp-highest_sack = tcp_write_queue_head(sk);
+   tcp_highest_sack_reset(sk);

[PATCH 14/21] [TCP]: Added queue parameter to _for_write_queue helpers

2007-12-01 Thread Ilpo Järvinen

From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED]

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 include/net/tcp.h |8 
 net/ipv4/tcp_input.c  |   18 +-
 net/ipv4/tcp_output.c |8 
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 433c6a6..0883697 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1232,16 +1232,16 @@ static inline struct sk_buff 
*tcp_write_queue_prev(struct sock *sk, struct sk_bu
return skb-prev;
 }
 
-#define tcp_for_write_queue(skb, sk)   \
+#define tcp_for_write_queue(skb, sk, queue)\
for (skb = (sk)-sk_write_queue.next;   \
 (skb != (struct sk_buff *)(sk)-sk_write_queue);  \
 skb = skb-next)
 
-#define tcp_for_write_queue_from(skb, sk)  \
+#define tcp_for_write_queue_from(skb, sk, queue)   \
for (; (skb != (struct sk_buff *)(sk)-sk_write_queue);\
 skb = skb-next)
 
-#define tcp_for_write_queue_from_safe(skb, tmp, sk)\
+#define tcp_for_write_queue_from_safe(skb, tmp, sk, queue) \
for (tmp = skb-next;   \
 (skb != (struct sk_buff *)(sk)-sk_write_queue);  \
 skb = tmp, tmp = skb-next)
@@ -1364,7 +1364,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, 
struct sk_buff *skb)
if (prev != (struct sk_buff *) sk-sk_write_queue)
fc = TCP_SKB_CB(prev)-fack_count + tcp_skb_pcount(prev);
 
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
if (!before(TCP_SKB_CB(skb)-seq, tcp_sk(sk)-snd_nxt) ||
TCP_SKB_CB(skb)-fack_count == fc)
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 35753b7..8a02de2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1132,7 +1132,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
icsk-icsk_ca_state != TCP_CA_Recovery)
return;
 
-   tcp_for_write_queue(skb, sk) {
+   tcp_for_write_queue(skb, sk, 0) {
u32 ack_seq = TCP_SKB_CB(skb)-ack_seq;
 
if (skb == tcp_send_head(sk))
@@ -1331,7 +1331,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff 
*skb, struct sock *sk,
int dup_sack_in, int *fack_count,
int *reord, int *flag)
 {
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
int in_sack = 0;
int dup_sack = dup_sack_in;
 
@@ -1370,7 +1370,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff 
*skb, struct sock *sk,
 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
u32 skip_to_seq)
 {
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
if (skb == tcp_send_head(sk))
break;
 
@@ -1687,7 +1687,7 @@ int tcp_use_frto(struct sock *sk)
 
skb = tcp_write_queue_head(sk);
skb = tcp_write_queue_next(sk, skb);/* Skips head */
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
if (skb == tcp_send_head(sk))
break;
if (TCP_SKB_CB(skb)-sackedTCPCB_RETRANS)
@@ -1794,7 +1794,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int 
allowed_segments, int flag)
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
 
-   tcp_for_write_queue(skb, sk) {
+   tcp_for_write_queue(skb, sk, 0) {
if (skb == tcp_send_head(sk))
break;
 
@@ -1894,7 +1894,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_clear_all_retrans_hints(tp);
}
 
-   tcp_for_write_queue(skb, sk) {
+   tcp_for_write_queue(skb, sk, 0) {
if (skb == tcp_send_head(sk))
break;
 
@@ -2145,7 +2145,7 @@ static void tcp_mark_head_lost(struct sock *sk, int 
packets, int fast_rexmit)
cnt = 0;
}
 
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
if (skb == tcp_send_head(sk))
break;
/* TODO: do this better */
@@ -2200,7 +2200,7 @@ static void tcp_update_scoreboard(struct sock *sk, int 
fast_rexmit)
skb = tp-scoreboard_skb_hint ? tp-scoreboard_skb_hint
: tcp_write_queue_head(sk);
 
-   tcp_for_write_queue_from(skb, sk) {
+   tcp_for_write_queue_from(skb, sk, 0) {
if (skb ==

1 2 >

1 - 100 of 129 matches

Mail list logo