[PATCH 0/10] sysfs network namespace support
Now that we have network namespace support merged it is time to revisit the sysfs support so we can remove the dependency on !SYSFS. I'm not even trying to base this on any of Tejun's very interesting work on sysfs to remove the coupling between kobjects and sysfs_dirents. For my objective that just means I would need to spend several more weeks staring at sysfs trying to figure out how to get where I am going and iterating several times from yet another new starting place. I want to get something working before I try for anymore perfection. I don't expect the userspace side of this to ever change which is close enough to perfect for me. The bulk of the patches are the changes to allow multiple sysfs superblocks. Then comes the tagged directory sysfs support which uses information captured at mount time to decide which object with which tag will appear in a directory. Then the support for renaming and deleting objects where the source may be ambiguous because of tagging. Then finally the network namespace support so it is clear how all of this tied together. Greg the last patch that enables tagged directory support seems to make most sense living in your tree, as it lives half in fs/sysfs/mount.c, and half in net/core/net-sysfs.c and all of it's dependencies are in Linus tree except for this patchset. Eric -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/10] sysfs: sysfs_get_dentry add a sb parameter
In preparation for multiple mounts of sysfs add a superblock parameter to sysfs_get_dentry. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/dir.c | 11 ++- fs/sysfs/file.c |2 +- fs/sysfs/sysfs.h |2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..cff2b12 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -84,6 +84,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) /** * sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sb: superblock of the dentry to return * @sd: sysfs_dirent of interest * * Get dentry for @sd. Dentry is looked up if currently not @@ -96,9 +97,9 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) * RETURNS: * Pointer to found dentry on success, ERR_PTR() value on error. */ -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd) { - struct dentry *dentry = dget(sysfs_sb-s_root); + struct dentry *dentry = dget(sb-s_root); while (dentry-d_fsdata != sd) { struct sysfs_dirent *cur; @@ -778,7 +779,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) goto out; /* nothing to rename */ /* get the original dentry */ - old_dentry = sysfs_get_dentry(sd); + old_dentry = sysfs_get_dentry(sysfs_sb, sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); goto out; @@ -845,14 +846,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) goto out; /* nothing to move */ /* get dentries */ - old_dentry = sysfs_get_dentry(sd); + old_dentry = sysfs_get_dentry(sysfs_sb, sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); goto out; } old_parent = old_dentry-d_parent; - new_parent = sysfs_get_dentry(new_parent_sd); + new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); goto out; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index ad13151..8c7bba0 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -569,7 +569,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) goto out; mutex_lock(sysfs_rename_mutex); - victim = sysfs_get_dentry(victim_sd); + victim = sysfs_get_dentry(sysfs_sb, victim_sd); mutex_unlock(sysfs_rename_mutex); if (IS_ERR(victim)) { rc = PTR_ERR(victim); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3308759..d4269ba 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -112,7 +112,7 @@ extern spinlock_t sysfs_assoc_lock; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); +struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); void sysfs_put_active_two(struct sysfs_dirent *sd); void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, -- 1.5.3.rc6.17.g1911 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/10] sysfs: Implement __sysfs_get_dentry
This function is similar but much simpler to sysfs_get_dentry returns a sysfs dentry if one curently exists. This requires less locking the sysfs_get_dentry and which makes it preferable in some contexts. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/dir.c | 38 ++ 1 files changed, 38 insertions(+), 0 deletions(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index cff2b12..3ec9040 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -764,6 +764,44 @@ void sysfs_remove_dir(struct kobject * kobj) __sysfs_remove_dir(sd); } +/** + * __sysfs_get_dentry - get dentry for the given sysfs_dirent + * @sb: superblock of the dentry to return + * @sd: sysfs_dirent of interest + * + * Get dentry for @sd. Only return a dentry if one currently + * exists. + * + * LOCKING: + * Kernel thread context (may sleep) + * + * RETURNS: + * Pointer to found dentry on success, NULL on failure. + */ +static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd) +{ + struct inode *inode; + struct dentry *dentry = NULL; + + inode = ilookup5_nowait(sysfs_sb, sd-s_ino, sysfs_ilookup_test, sd); + if (inode !(inode-i_state I_NEW)) { + struct dentry *alias; + spin_lock(dcache_lock); + list_for_each_entry(alias, inode-i_dentry, d_alias) { + if (!IS_ROOT(alias) d_unhashed(alias)) + continue; + if (alias-d_sb != sb) + continue; + dentry = alias; + dget_locked(dentry); + break; + } + spin_unlock(dcache_lock); + } + iput(inode); + return dentry; +} + int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { struct sysfs_dirent *sd = kobj-sd; -- 1.5.3.rc6.17.g1911 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/10] sysfs: Rename Support multiple superblocks
This patch modifies the sysfs_rename_dir and sysfs_move_dir to support multiple sysfs dentry trees rooted in different sysfs superblocks. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/dir.c | 190 +++ 1 files changed, 135 insertions(+), 55 deletions(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3ec9040..0d0c87e 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -802,42 +802,112 @@ static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_di return dentry; } +struct sysfs_rename_struct { + struct list_head list; + struct dentry *old_dentry; + struct dentry *new_dentry; + struct dentry *old_parent; + struct dentry *new_parent; +}; + +static void post_rename(struct list_head *head) +{ + struct sysfs_rename_struct *srs; + while (!list_empty(head)) { + srs = list_entry(head-next, struct sysfs_rename_struct, list); + dput(srs-old_dentry); + dput(srs-new_dentry); + dput(srs-old_parent); + dput(srs-new_parent); + list_del(srs-list); + kfree(srs); + } +} + +static int prep_rename(struct list_head *head, + struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, + const char *name) +{ + struct sysfs_rename_struct *srs; + struct super_block *sb; + struct dentry *dentry; + int error; + + list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) { + dentry = sysfs_get_dentry(sb, sd); + if (dentry == ERR_PTR(-EXDEV)) + continue; + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto err_out; + } + + srs = kzalloc(sizeof(*srs), GFP_KERNEL); + if (!srs) { + dput(dentry); + goto err_out; + } + + INIT_LIST_HEAD(srs-list); + list_add(head, srs-list); + srs-old_dentry = dentry; + srs-old_parent = dget(dentry-d_parent); + + dentry = sysfs_get_dentry(sb, new_parent_sd); + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); + goto err_out; + } + srs-new_parent = dentry; + + error = -ENOMEM; + dentry = d_alloc_name(srs-new_parent, name); + if (!dentry) + goto err_out; + srs-new_dentry = dentry; + } + return 0; + +err_out: + post_rename(head); + return error; +} + int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { struct sysfs_dirent *sd = kobj-sd; - struct dentry *parent = NULL; - struct dentry *old_dentry = NULL, *new_dentry = NULL; + struct list_head todo; + struct sysfs_rename_struct *srs; + struct inode *parent_inode = NULL; const char *dup_name = NULL; int error; + INIT_LIST_HEAD(todo); mutex_lock(sysfs_rename_mutex); error = 0; if (strcmp(sd-s_name, new_name) == 0) goto out; /* nothing to rename */ - /* get the original dentry */ - old_dentry = sysfs_get_dentry(sysfs_sb, sd); - if (IS_ERR(old_dentry)) { - error = PTR_ERR(old_dentry); - goto out; - } + sysfs_grab_supers(); + error = prep_rename(todo, sd, sd-s_parent, new_name); + if (error) + goto out_release; - parent = old_dentry-d_parent; + error = -ENOMEM; + mutex_lock(sysfs_mutex); + parent_inode = sysfs_get_inode(sd-s_parent); + mutex_unlock(sysfs_mutex); + if (!parent_inode) + goto out_release; - /* lock parent and get dentry for new name */ - mutex_lock(parent-d_inode-i_mutex); + mutex_lock(parent_inode-i_mutex); mutex_lock(sysfs_mutex); error = -EEXIST; if (sysfs_find_dirent(sd-s_parent, new_name)) goto out_unlock; - error = -ENOMEM; - new_dentry = d_alloc_name(parent, new_name); - if (!new_dentry) - goto out_unlock; - /* rename kobject and sysfs_dirent */ error = -ENOMEM; new_name = dup_name = kstrdup(new_name, GFP_KERNEL); @@ -852,17 +922,21 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) sd-s_name = new_name; /* rename */ - d_add(new_dentry, NULL); - d_move(old_dentry, new_dentry); + list_for_each_entry(srs, todo, list) { + d_add(srs-new_dentry, NULL); + d_move(srs-old_dentry, srs-new_dentry); + } error = 0; - out_unlock: +out_unlock: mutex_unlock(sysfs_mutex); - mutex_unlock(parent-d_inode-i_mutex); +
[PATCH 06/10] sysfs: sysfs_chmod_file handle multiple superblocks
Teach sysfs_chmod_file how to handle multiple sysfs superblocks. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/file.c | 51 --- 1 files changed, 28 insertions(+), 23 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 8c7bba0..ade6140 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -558,7 +558,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) { struct sysfs_dirent *victim_sd = NULL; - struct dentry *victim = NULL; + struct super_block *sb; + struct dentry *victim; struct inode * inode; struct iattr newattrs; int rc; @@ -569,31 +570,35 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) goto out; mutex_lock(sysfs_rename_mutex); - victim = sysfs_get_dentry(sysfs_sb, victim_sd); - mutex_unlock(sysfs_rename_mutex); - if (IS_ERR(victim)) { - rc = PTR_ERR(victim); - victim = NULL; - goto out; - } - - inode = victim-d_inode; - - mutex_lock(inode-i_mutex); + sysfs_grab_supers(); + list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) { + victim = sysfs_get_dentry(sb, victim_sd); + if (victim == ERR_PTR(-EXDEV)) + continue; + if (IS_ERR(victim)) { + rc = PTR_ERR(victim); + victim = NULL; + goto out_unlock; + } - newattrs.ia_mode = (mode S_IALLUGO) | (inode-i_mode ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - rc = notify_change(victim, newattrs); + inode = victim-d_inode; + mutex_lock(inode-i_mutex); + newattrs.ia_mode = (mode S_IALLUGO) | (inode-i_mode ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + rc = notify_change(victim, newattrs); + if (rc == 0) { + mutex_lock(sysfs_mutex); + victim_sd-s_mode = newattrs.ia_mode; + mutex_unlock(sysfs_mutex); + } + mutex_unlock(inode-i_mutex); - if (rc == 0) { - mutex_lock(sysfs_mutex); - victim_sd-s_mode = newattrs.ia_mode; - mutex_unlock(sysfs_mutex); + dput(victim); } - - mutex_unlock(inode-i_mutex); - out: - dput(victim); +out_unlock: + sysfs_release_supers(); + mutex_unlock(sysfs_rename_mutex); +out: sysfs_put(victim_sd); return rc; } -- 1.5.3.rc6.17.g1911 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/10] sysfs: Implement sysfs tagged directory support.
The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and potentially a few other directories of the form /sys/ ... /net/*. What this patch does is to add an additional tag field to the sysfs dirent structure. For directories that should show different contents depending on the context such as /sys/class/net/, and /sys/devices/virtual/net/ this tag field is used to specify the context in which those directories should be visible. Effectively this is the same as creating multiple distinct directories with the same name the internally to sysfs the result is nicer. I am calling the concept of a single directory that looks like multiple directories all at the same path in the filesystem tagged directories. For the networking namespace the set of directories whose contents I need to filter with tags can depend on the presence or absence of hotplug hardware or which modules are currently loaded. Which means I need a simple race free way to setup those directories as tagged. To achieve a race free design all tagged directories are created and managed by sysfs itself. The upper level code that knows what tagged directories we need provides just two methods that enable this: sb_tag() - that returns a void * tag that identifies the context of the process that mounted sysfs. kobject_tag(kobj) - that returns a void * tag that identifies the context a kobject should be in. Everything else is left up to sysfs. For the network namespace sb_tag and kobject_tag are essentially one line functions, and look to remain that. The work needed in sysfs is more extensive. At each directory or symlink creating I need to check if the directory it is being created in is a tagged directory and if so generate the appropriate tag to place on the sysfs_dirent. Likewise at each symlink or directory removal I need to check if the sysfs directory it is being removed from is a tagged directory and if so figure out which tag goes along with the name I am deleting. Currently only directories which hold kobjects, and symlinks are supported. There is not enough information in the current file attribute interfaces to give us anything to discriminate on which makes it useless, and there are no potential users which makes it an uninteresting problem to solve. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/bin.c|2 +- fs/sysfs/dir.c| 182 + fs/sysfs/file.c |8 +- fs/sysfs/group.c | 12 ++-- fs/sysfs/inode.c |6 +- fs/sysfs/mount.c | 44 +++- fs/sysfs/symlink.c|2 +- fs/sysfs/sysfs.h | 16 - include/linux/sysfs.h | 16 9 files changed, 255 insertions(+), 33 deletions(-) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 006fc64..86e1128 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - sysfs_hash_and_remove(kobj-sd, attr-attr.name); + sysfs_hash_and_remove(kobj, kobj-sd, attr-attr.name); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0d0c87e..f4bd41a 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -99,8 +99,17 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) */ struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd) { - struct dentry *dentry = dget(sb-s_root); + struct dentry *dentry; + + /* Bail if this sd won't show up in this superblock */ + if (sd-s_parent sd-s_parent-s_flags SYSFS_FLAG_TAGGED) { + const void *tag; + tag = sysfs_lookup_tag(sd-s_parent, sb); + if (sd-s_tag.tag != tag) + return ERR_PTR(-EXDEV); + } + dentry = dget(sb-s_root); while (dentry-d_fsdata != sd) { struct sysfs_dirent *cur; struct dentry *parent; @@ -419,7 +428,11 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, */ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { - if (sysfs_find_dirent(acxt-parent_sd, sd-s_name)) { + const void *tag = NULL; + + tag = sysfs_creation_tag(acxt-parent_sd, sd); + + if (sysfs_find_dirent(acxt-parent_sd, tag, sd-s_name)) { printk(KERN_WARNING sysfs: duplicate filename '%s' can not be created\n, sd-s_name); WARN_ON(1); @@ -428,6 +441,9 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) sd-s_parent = sysfs_get(acxt-parent_sd); + if (sd-s_parent-s_flags SYSFS_FLAG_TAGGED) + sd-s_tag.tag = tag; + if
[PATCH 08/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link
When removing a symlink sysfs_remove_link does not provide enough information to figure out which tagged directory the symlink falls in. So I need sysfs_delete_link which is passed the target of the symlink to delete. Further half the time when we are removing a symlink the code is actually renaming the symlink but not doing so explicitly because we don't have a symlink rename method. So I have added sysfs_rename_link as well. Both of these functions now have enough information to find a symlink in a tagged directory. The only restriction is that they must be called before the target kobject is renamed or deleted. If they are called later I loose track of which tag the target kobject was marked with and can no longer find the old symlink to remove it. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/symlink.c| 31 +++ include/linux/sysfs.h | 17 + 2 files changed, 48 insertions(+), 0 deletions(-) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index b0f8070..89c98cb 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -80,6 +80,21 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char } /** + * sysfs_delete_link - remove symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @name: name of the symlink to remove. + * + * Unlike sysfs_remove_link sysfs_delete_link has enough information + * to successfully delete symlinks in tagged directories. + */ +void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, + const char *name) +{ + sysfs_hash_and_remove(targ, kobj-sd, name); +} + +/** * sysfs_remove_link - remove symlink in object's directory. * @kobj: object we're acting for. * @name: name of the symlink to remove. @@ -90,6 +105,22 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) sysfs_hash_and_remove(kobj, kobj-sd, name); } +/** + * sysfs_rename_link - rename symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @old: previous name of the symlink. + * @new: new name of the symlink. + * + * A helper function for the common rename symlink idiom. + */ +int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new) +{ + sysfs_delete_link(kobj, targ, old); + return sysfs_create_link(kobj, targ, new); +} + static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, struct sysfs_dirent *target_sd, char *path) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c8d7a69..c2e8b0d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -109,6 +109,12 @@ int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target, const char *name); void sysfs_remove_link(struct kobject *kobj, const char *name); +int sysfs_rename_link(struct kobject *kobj, struct kobject *target, + const char *old_name, const char *new_name); + +void sysfs_delete_link(struct kobject *dir, struct kobject *targ, + const char *name); + int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, @@ -195,6 +201,17 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name) ; } +static inline int sysfs_rename_link(struct kobject * k, struct kobject *t, + const char *old_name, const char * new_name) +{ + return 0; +} + +static inline void sysfs_delete_link(struct kobject *k, struct kobject *t, +const char *name) +{ +} + static inline int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp) { -- 1.5.3.rc6.17.g1911 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/10] driver core: Implement tagged directory support for device classes.
This patch enables tagging on every class directory if struct class has tag_ops. In addition device_del and device_rename were modified to use sysfs_delete_link and sysfs_rename_link respectively to ensure when these operations happen on devices whose classes have tag_ops that they work properly. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- drivers/base/class.c | 30 --- drivers/base/core.c| 51 +-- include/linux/device.h |2 + 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/drivers/base/class.c b/drivers/base/class.c index c4f8843..ed9393d 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -135,6 +135,17 @@ static void remove_class_attrs(struct class * cls) } } +static int class_setup_tagging(struct class *cls) +{ + const struct sysfs_tagged_dir_operations *tag_ops; + + tag_ops = cls-tag_ops; + if (!tag_ops) + return 0; + + return sysfs_enable_tagging(cls-subsys.kobj, tag_ops); +} + int class_register(struct class * cls) { int error; @@ -160,11 +171,22 @@ int class_register(struct class * cls) cls-subsys.kobj.ktype = class_ktype; error = kset_register(cls-subsys); - if (!error) { - error = add_class_attrs(class_get(cls)); - class_put(cls); - } + if (error) + goto out; + + error = class_setup_tagging(cls); + if (error) + goto out_unregister; + + error = add_class_attrs(cls); + if (error) + goto out_unregister; + +out: return error; +out_unregister: + kset_unregister(cls-subsys); + goto out; } void class_unregister(struct class * cls) diff --git a/drivers/base/core.c b/drivers/base/core.c index a2c3d4e..f9d3fcf 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -600,16 +600,20 @@ static struct kobject *get_device_parent(struct device *dev, return kobj; /* or create a new class-directory at the parent device */ - k = kobject_create(dev-class-name, parent_kobj); - if (!k) +bser kobj = kobject_create(dev-class-name, parent_kobj); + if (!kobj) return NULL; - k-kset = dev-class-class_dirs; - retval = kobject_register(k); + kobj-kset = dev-class-class_dirs; + retval = kobject_register(kobj); if (retval 0) { - kfree(k); + kfree(kobj); return NULL; } - return k; + /* If we created a new class-directory setup tagging */ + if (kobj dev-class-tag_ops) + sysfs_enable_tagging(k, dev-class-tag_ops); + + return kobj; } if (parent) @@ -758,7 +762,8 @@ static void device_remove_class_symlinks(struct device *dev) if (dev-kobj.parent != dev-class-subsys.kobj dev-type != part_type) - sysfs_remove_link(dev-class-subsys.kobj, dev-bus_id); + sysfs_delete_link(dev-class-subsys.kobj, + dev-kobj, dev-bus_id); #else if (dev-parent dev-type != part_type) sysfs_remove_link(dev-kobj, device); @@ -1223,6 +1228,15 @@ int device_rename(struct device *dev, char *new_name) strlcpy(old_device_name, dev-bus_id, BUS_ID_SIZE); strlcpy(dev-bus_id, new_name, BUS_ID_SIZE); +#ifndef CONFIG_SYSFS_DEPRECATED + if (dev-class (dev-kobj.parent != dev-class-subsys.kobj)) { + error = sysfs_rename_link(dev-class-subsys.kobj, + dev-kobj, old_device_name, new_name); + if (error) + goto out; + } +#endif + error = kobject_rename(dev-kobj, new_name); if (error) { strlcpy(dev-bus_id, old_device_name, BUS_ID_SIZE); @@ -1231,24 +1245,13 @@ int device_rename(struct device *dev, char *new_name) #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { + error = -ENOMEM; new_class_name = make_class_name(dev-class-name, dev-kobj); - if (new_class_name) { - error = sysfs_create_link(dev-parent-kobj, - dev-kobj, new_class_name); - if (error) - goto out; - sysfs_remove_link(dev-parent-kobj, old_class_name); - } - } -#else - if (dev-class) { - sysfs_remove_link(dev-class-subsys.kobj, old_device_name); - error = sysfs_create_link(dev-class-subsys.kobj, dev-kobj, - dev-bus_id); - if (error) { - dev_err(dev,
[PATCH 10/10] net: Enable tagging for net_class directories in sysfs
The problem. Network devices show up in sysfs and with the network namespace active multiple devices with the same name can show up in the same directory, ouch! To avoid that problem and allow existing applications in network namespaces to see the same interface that is currently presented in sysfs, this patch enables the tagging directory support in sysfs. By using the network namespace pointers as tags to separate out the sysfs directory entries we ensure that we don't have conflicts in the directories and applications only see a limited set of the network devices. Signed-off-by: Eric W. Biederman [EMAIL PROTECTED] --- fs/sysfs/mount.c | 36 include/linux/sysfs.h |2 ++ net/Kconfig |2 +- net/core/net-sysfs.c | 20 4 files changed, 59 insertions(+), 1 deletions(-) diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index f6e49d9..ed47133 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -16,6 +16,8 @@ #include linux/mount.h #include linux/pagemap.h #include linux/init.h +#include linux/nsproxy.h +#include net/net_namespace.h #include sysfs.h @@ -78,6 +80,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) root-d_sb = sb; sb-s_root = root; sb-s_fs_info = info; + info-tag.net_ns = hold_net(current-nsproxy-net_ns); return 0; out_err: @@ -95,6 +98,9 @@ static int sysfs_test_super(struct super_block *sb, void *ptr) struct sysfs_super_info *info = sysfs_info(sb); int found = 1; + if (task-nsproxy-net_ns != info-tag.net_ns) + found = 0; + return found; } @@ -131,6 +137,8 @@ static void sysfs_kill_sb(struct super_block *sb) struct sysfs_super_info *info = sysfs_info(sb); kill_anon_super(sb); + if (info-tag.net_ns) + release_net(info-tag.net_ns); kfree(info); } @@ -181,6 +189,31 @@ restart: spin_unlock(sb_lock); } +#ifdef CONFIG_NET +static void sysfs_net_exit(struct net *net) +{ + /* Allow the net namespace to go away while sysfs is still mounted. */ + struct super_block *sb; + mutex_lock(sysfs_rename_mutex); + sysfs_grab_supers(); + mutex_lock(sysfs_mutex); + list_for_each_entry(sb, sysfs_fs_type.fs_supers, s_instances) { + struct sysfs_super_info *info = sysfs_info(sb); + if (info-tag.net_ns != net) + continue; + release_net(info-tag.net_ns); + info-tag.net_ns = NULL; + } + mutex_unlock(sysfs_mutex); + sysfs_release_supers(); + mutex_unlock(sysfs_rename_mutex); +} + +static struct pernet_operations sysfs_net_ops = { + .exit = sysfs_net_exit, +}; +#endif + int __init sysfs_init(void) { int err = -ENOMEM; @@ -205,6 +238,9 @@ int __init sysfs_init(void) unregister_filesystem(sysfs_fs_type); goto out_err; } +#ifdef CONFIG_NET + register_pernet_subsys(sysfs_net_ops); +#endif } else goto out_err; out: diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c2e8b0d..2c93278 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -19,6 +19,7 @@ struct kobject; struct module; +struct net; /* FIXME * The *owner field is no longer used, but leave around @@ -77,6 +78,7 @@ struct sysfs_ops { }; struct sysfs_tag_info { + struct net *net_ns; }; struct sysfs_tagged_dir_operations { diff --git a/net/Kconfig b/net/Kconfig index ab4e6da..250585e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -30,7 +30,7 @@ menu Networking options config NET_NS bool Network namespace support default n - depends on EXPERIMENTAL !SYSFS + depends on EXPERIMENTAL help Allow user space to create what appear to be multiple instances of the network stack. diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 61ead1d..2aa64d0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -13,7 +13,9 @@ #include linux/kernel.h #include linux/netdevice.h #include linux/if_arp.h +#include linux/nsproxy.h #include net/sock.h +#include net/net_namespace.h #include linux/rtnetlink.h #include linux/wireless.h #include net/iw_handler.h @@ -431,6 +433,23 @@ static void netdev_release(struct device *d) kfree((char *)dev - dev-padded); } +static const void *net_sb_tag(struct sysfs_tag_info *info) +{ + return info-net_ns; +} + +static const void *net_kobject_tag(struct kobject *kobj) +{ + struct net_device *dev; + dev = container_of(kobj, struct net_device, dev.kobj); + return dev-nd_net; +} + +static const struct sysfs_tagged_dir_operations net_tagged_dir_operations = { + .sb_tag = net_sb_tag, + .kobject_tag = net_kobject_tag, +}; + static struct class net_class = {
[PATCH] zd1211rw: Fix alignment problems
Shaddy Baddah found an alignment problem with zd1211rw driver at 2007-11-19. This patch fixes it, it is based on the patch proposed by Herbert Xu. The alignment 4 has been the agreed value on the linux-wireless mailing list. Notify that the problem does only affect the old zd1211rw softmac driver and not the zd1211rw-mac80211 driver. Daniel Drake has already provided a patch for the replacement of the softmac driver, which this patch will break. Signed-off-by: Ulrich Kunitz [EMAIL PROTECTED] --- drivers/net/wireless/zd1211rw/zd_mac.c | 10 -- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index a903645..5298a8b 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1130,6 +1130,8 @@ static void zd_mac_rx(struct zd_mac *mac, struct sk_buff *skb) __skb_trim(skb, skb-len - (IEEE80211_FCS_LEN + sizeof(struct rx_status))); + ZD_ASSERT(IS_ALIGNED((unsigned long)skb-data, 4)); + update_qual_rssi(mac, skb-data, skb-len, stats.signal, status-signal_strength); @@ -1166,15 +1168,19 @@ static void do_rx(unsigned long mac_ptr) int zd_mac_rx_irq(struct zd_mac *mac, const u8 *buffer, unsigned int length) { struct sk_buff *skb; + unsigned int reserved = + ALIGN(max_t(unsigned int, + sizeof(struct zd_rt_hdr), ZD_PLCP_HEADER_SIZE), 4) - + ZD_PLCP_HEADER_SIZE; - skb = dev_alloc_skb(sizeof(struct zd_rt_hdr) + length); + skb = dev_alloc_skb(reserved + length); if (!skb) { struct ieee80211_device *ieee = zd_mac_to_ieee80211(mac); dev_warn(zd_mac_dev(mac), Could not allocate skb.\n); ieee-stats.rx_dropped++; return -ENOMEM; } - skb_reserve(skb, sizeof(struct zd_rt_hdr)); + skb_reserve(skb, reserved); memcpy(__skb_put(skb, length), buffer, length); skb_queue_tail(mac-rx_queue, skb); tasklet_schedule(mac-rx_tasklet); -- 1.5.3.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ZD1211RW unaligned accesses...
John W. Linville wrote: So, did the patch below fix the problem? Should I apply it? John John, the patch would have worked, but I have sent a second one to the list, which is based on Herbert's and has an assert to be able to test the patch on x86. You should be notify that the mac80211 driver, doesn't suffer from the problem and Daniel has already provided a patch to replace zd1211rw by the mac80211 driver. Daniel's patch must of course break by the new patch. -- Uli Kunitz -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/4] datagram: mem_scheudle functions
On Wed, Nov 28, 2007 at 01:52:59PM -0500, Hideo AOKI wrote: +static inline int sk_wmem_schedule(struct sock *sk, int size) +{ + if (sk-sk_type == SOCK_STREAM) + return sk_stream_wmem_schedule(sk, size); + else if (sk-sk_type == SOCK_DGRAM) + return sk_datagram_wmem_schedule(sk, size); + else + return 1; +} Why do we need this function? As far as I can see we always know whether it's a stream or datagram socket at compile time so doing a run-time test is pointless. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [net] xrfm depends on crypto
net/built-in.o: In function `xfrm_find_algo': xfrm_algo.c:(.text+0x61c82): undefined reference to `crypto_has_alg' Signed-off-by: Sebastian Siewior [EMAIL PROTECTED] --- net/xfrm/Kconfig |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 577a4f8..35ed342 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -4,6 +4,7 @@ config XFRM bool depends on NET + select CRYPTO config XFRM_USER tristate Transformation user configuration interface -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [net] RxRPC session sockets depend on crypto
net/built-in.o: In function `rxrpc_destroy_s': ar-key.c:(.text+0x653cd): undefined reference to `crypto_free_tfm' net/built-in.o: In function `rxrpc_instantiate_s': ar-key.c:(.text+0x65496): undefined reference to `crypto_alloc_base' Signed-off-by: Sebastian Siewior [EMAIL PROTECTED] --- net/rxrpc/Kconfig |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index e662f1d..60c4738 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -6,6 +6,7 @@ config AF_RXRPC tristate RxRPC session sockets depends on INET EXPERIMENTAL select KEYS + select CRYPTO help Say Y or M here to include support for RxRPC session sockets (just the transport part, not the presentation part: (un)marshalling is @@ -31,7 +32,6 @@ config AF_RXRPC_DEBUG config RXKAD tristate RxRPC Kerberos security depends on AF_RXRPC - select CRYPTO select CRYPTO_MANAGER select CRYPTO_BLKCIPHER select CRYPTO_PCBC -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] udp: memory accounting in IPv4
On Wed, Nov 28, 2007 at 01:53:36PM -0500, Hideo AOKI wrote: +/** + * __skb_queue_purge_and_sub_memory_allocated + * - empty a list and subtruct memory allocation counter + * @sk: sk + * @list: list to empty + * Delete all buffers on an sk_buff list and subtruct the + * truesize of the sk_buff for memory accounting. Each buffer + * is removed from the list and one reference dropped. This + * function does not take the list lock and the caller must + * hold the relevant locks to use it. + */ +static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock *sk, + struct sk_buff_head *list) +{ + struct sk_buff *skb; + int purged_skb_size = 0; + while ((skb = __skb_dequeue(list)) != NULL) { + purged_skb_size += sk_datagram_pages(skb-truesize); + kfree_skb(skb); + } + atomic_sub(purged_skb_size, sk-sk_prot-memory_allocated); +} Thanks, this is a lot better than before! However, I'm still a little concerned about the effect of two more atomic op's per packet that we're adding here. Hang on a sec, that should've been Dave's line since atomic ops are cheap on x86 :) But seriously, it's not so much that we have two more atomic op's per packet, but we have two more writes to a single global counter for each packet. This is going to really suck on SMP. So what I'd like to see is a scheme that's similar to sk_forward_alloc. The idea is that each socket allocates memory using mem_schedule and then stores it in sk_forward_alloc. Each packet then only has to add to/subtract from sk_forward_alloc. There is one big problem with this though, UDP is not serialised like TCP. So you can't just use sk_forward_alloc since it's not an atomic_t. We'll need to think about this one a bit more. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] XFRM: SPD auditing fix to include the netmask/prefix-length
On Fri, Nov 30, 2007 at 09:51:48AM -0500, Paul Moore wrote: Steve and/or Joy, could we get a verdict on this issue? The lack of a netmask in the SPD audit messages is pretty serious so I'd like to see this fixed as soon as possible. I'll take the resounding silence as an indication of approval :) Patch applied to net-2.6.25. Thanks Paul. -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] remove extra memset from dn_fib_check_nh
On Fri, Nov 30, 2007 at 06:54:01PM +0300, Denis V. Lunev wrote: [PATCH] remove extra memset from dn_fib_check_nh Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Applied to net-2.6.25. Thanks Denis! -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 1/6][CORE] Remove unneeded ifdefs from sysctl_net_core.c
They include the whole file, but it is already compiled out when SYSCTL=n, since it is obj-$(CONFIG_SYSCTL) target in the Makefile. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 113cc72..277c8fa 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -13,8 +13,6 @@ #include net/sock.h #include net/xfrm.h -#ifdef CONFIG_SYSCTL - ctl_table core_table[] = { #ifdef CONFIG_NET { @@ -151,5 +149,3 @@ ctl_table core_table[] = { }, { .ctl_name = 0 } }; - -#endif -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace
On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote: Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] All applied to net-2.6.25. diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b0cf075..f97b2a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +43,7 @@ struct net { /* unix sockets */ int sysctl_unix_max_dgram_qlen; + struct ctl_table_header *unix_ctl; }; But I gotta say this struct/file is going to be enormous. It's also one of those files that causes everything to get recompiled. Maybe we ought to make a rule that each subsystem only gets to have at most one entry in it :) Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 2/6][CORE] Isolate the net/core/ sysctl table
Using ctl paths we can put all the stuff, related to net/core/ sysctl table, into one file and remove all the references on it. As a good side effect this hides the core_table name from the global name space :) Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/include/net/sock.h b/include/net/sock.h index 43e3cd9..8f32a71 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1325,10 +1325,6 @@ extern __u32 sysctl_rmem_max; extern void sk_init(void); -#ifdef CONFIG_SYSCTL -extern struct ctl_table core_table[]; -#endif - extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 277c8fa..e322713 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,10 +10,11 @@ #include linux/module.h #include linux/socket.h #include linux/netdevice.h +#include linux/init.h #include net/sock.h #include net/xfrm.h -ctl_table core_table[] = { +static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { .ctl_name = NET_CORE_WMEM_MAX, @@ -149,3 +150,19 @@ ctl_table core_table[] = { }, { .ctl_name = 0 } }; + +static __initdata struct ctl_path net_core_path[] = { + { .procname = net, .ctl_name = CTL_NET, }, + { .procname = core, .ctl_name = NET_CORE, }, + { }, +}; + +static __init int sysctl_core_init(void) +{ + struct ctl_table_header *hdr; + + hdr = register_sysctl_paths(net_core_path, net_core_table); + return hdr == NULL ? -ENOMEM : 0; +} + +__initcall(sysctl_core_init); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index c50c793..747fc55 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -31,12 +31,6 @@ #endif struct ctl_table net_table[] = { - { - .ctl_name = NET_CORE, - .procname = core, - .mode = 0555, - .child = core_table, - }, #ifdef CONFIG_INET { .ctl_name = NET_IPV4, -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 3/6][IPv4] Cleanup the sysctl_net_ipv4.c file
This includes several cleanups: * move the ipv4_config to af_inet.c; * tune Makefile to compile out this file when needed; * remove additional sysctl_ip_nonlocal_bind declaration (it is already declared in net/ip.h); * remove no nonger needed ifdefs from this file. This is a preparation for using ctl paths for net/ipv4/ sysctl table. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 93fe396..ad40ef3 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,9 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ -sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ +fib_frontend.o fib_semantics.o \ inet_fragment.o +obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c75f20b..0e4b6eb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk); static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); +struct ipv4_config ipv4_config; + +EXPORT_SYMBOL(ipv4_config); + /* New destruction routine */ void inet_sock_destruct(struct sock *sk) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bec6fe8..3546424 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -21,19 +21,10 @@ #include net/cipso_ipv4.h #include net/inet_frag.h -/* From af_inet.c */ -extern int sysctl_ip_nonlocal_bind; - -#ifdef CONFIG_SYSCTL static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -#endif - -struct ipv4_config ipv4_config; - -#ifdef CONFIG_SYSCTL static int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, @@ -887,7 +878,3 @@ ctl_table ipv4_table[] = { }, { .ctl_name = 0 } }; - -#endif /* CONFIG_SYSCTL */ - -EXPORT_SYMBOL(ipv4_config); -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 4/6][IPV4] Use ctl paths to register net/ipv4/ table
This is the same as I did for the net/core/ table in the second patch in his series: use the paths and isolate the whole table in the .c file. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/include/net/ip.h b/include/net/ip.h index 83fb9f1..7e1dd67 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -393,6 +393,4 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, extern int ip_misc_proc_init(void); #endif -extern struct ctl_table ipv4_table[]; - #endif /* _IP_H */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3546424..bfd0dec 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -13,6 +13,7 @@ #include linux/igmp.h #include linux/inetdevice.h #include linux/seqlock.h +#include linux/init.h #include net/snmp.h #include net/icmp.h #include net/ip.h @@ -247,7 +248,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam } -ctl_table ipv4_table[] = { +static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_TCP_TIMESTAMPS, .procname = tcp_timestamps, @@ -878,3 +879,19 @@ ctl_table ipv4_table[] = { }, { .ctl_name = 0 } }; + +static __initdata struct ctl_path net_ipv4_path[] = { + { .procname = net, .ctl_name = CTL_NET, }, + { .procname = ipv4, .ctl_name = NET_IPV4, }, + { }, +}; + +static __init int sysctl_ipv4_init(void) +{ + struct ctl_table_header *hdr; + + hdr = register_sysctl_paths(net_ipv4_path, ipv4_table); + return hdr == NULL ? -ENOMEM : 0; +} + +__initcall(sysctl_ipv4_init); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 747fc55..a4f0ed8 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -31,14 +31,6 @@ #endif struct ctl_table net_table[] = { -#ifdef CONFIG_INET - { - .ctl_name = NET_IPV4, - .procname = ipv4, - .mode = 0555, - .child = ipv4_table - }, -#endif #ifdef CONFIG_TR { .ctl_name = NET_TR, -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 2/2][NEIGH] Use the ctl paths to create neighbours sysctls
On Fri, Nov 30, 2007 at 08:29:16PM +0300, Pavel Emelyanov wrote: Since the path is modified, it is put on the stack, to avoid possible races with multiple calls to neigh_sysctl_register() : it is called by protocols and I didn't find any protection in this case. Did I overlooked the rtnl lock?. I think the only caller that can be a module is IPv6 :) The stack growth of the neigh_sysctl_register() is 40 bytes. I believe this is OK, since this is not that much and this function is not called with the deep stack (device/protocols register). Yes it's fine. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Both applied to net-2.6.25. Thanks Pavel! -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 6/6][CORE] Remove the empty net_table
I have removed all the entries from this table (core_table, ipv4_table and tr_table), so now we can safely drop it. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/include/linux/net.h b/include/linux/net.h index f95f12c..c414d90 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -337,7 +337,6 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include linux/sysctl.h -extern ctl_table net_table[]; extern int net_msg_cost; extern int net_msg_burst; #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 946a01c..894a177 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -199,14 +199,6 @@ static struct ctl_table root_table[] = { .mode = 0555, .child = vm_table, }, -#ifdef CONFIG_NET - { - .ctl_name = CTL_NET, - .procname = net, - .mode = 0555, - .child = net_table, - }, -#endif { .ctl_name = CTL_FS, .procname = fs, diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 16ad14b..665e856 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -30,10 +30,6 @@ #include linux/if_tr.h #endif -struct ctl_table net_table[] = { - { 0 }, -}; - static struct list_head * net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) { -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] udp: memory accounting in IPv4
Herbert Xu a écrit : On Wed, Nov 28, 2007 at 01:53:36PM -0500, Hideo AOKI wrote: +/** + * __skb_queue_purge_and_sub_memory_allocated + * - empty a list and subtruct memory allocation counter + * @sk: sk + * @list: list to empty + * Delete all buffers on an sk_buff list and subtruct the + * truesize of the sk_buff for memory accounting. Each buffer + * is removed from the list and one reference dropped. This + * function does not take the list lock and the caller must + * hold the relevant locks to use it. + */ +static inline void __skb_queue_purge_and_sub_memory_allocated(struct sock *sk, + struct sk_buff_head *list) +{ + struct sk_buff *skb; + int purged_skb_size = 0; + while ((skb = __skb_dequeue(list)) != NULL) { + purged_skb_size += sk_datagram_pages(skb-truesize); + kfree_skb(skb); + } + atomic_sub(purged_skb_size, sk-sk_prot-memory_allocated); +} Thanks, this is a lot better than before! However, I'm still a little concerned about the effect of two more atomic op's per packet that we're adding here. Hang on a sec, that should've been Dave's line since atomic ops are cheap on x86 :) But seriously, it's not so much that we have two more atomic op's per packet, but we have two more writes to a single global counter for each packet. This is going to really suck on SMP. So what I'd like to see is a scheme that's similar to sk_forward_alloc. The idea is that each socket allocates memory using mem_schedule and then stores it in sk_forward_alloc. Each packet then only has to add to/subtract from sk_forward_alloc. There is one big problem with this though, UDP is not serialised like TCP. So you can't just use sk_forward_alloc since it's not an atomic_t. We'll need to think about this one a bit more. I agree adding yet another atomics ops is a big problem. Another idea, coupled with recent work on percpu storage done by Christoph Lameter, would be to use kind of a percpu_counter : We dont really need strong and precise memory accounting (UDP , but TCP as well), just some kind of limit to avoid memory to be too much used. That is, updating a percpu variable, and doing some updates to a global counter only when this percpu variable escapes from a given range. Lot of contended cache lines could benefit from this relaxing (count of sockets...) I would wait first that Christoph work is done, so that we dont need atomic ops on local cpu storage (and no need to disable preemption too). -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Mark Lord wrote: Now that we have network namespace support merged it is time to revisit the sysfs support so we can remove the dependency on !SYSFS. ... Now that the namespace updates are part of 2.6.24, there is a major inconsistency in network EXPORT_SYMBOLs. It used to be that an external network module could get away without having to add a MODULE_LICENSE(GPL*) line to the source. In support of that, common networking functions (still) use EXPORT_SYMBOL() rather than the more restrictive EXPORT_SYMBOL_GPL(). Eg. register_netdev(), sk_alloc(), __dev_get_by_name(). But now, none of those three are actually usable by default, because they all require init_net, which is EXPORT_SYMBOL_GPL(). So.. It appears that one of three things should really happen next: 1) Change the other exports to also be EXPORT_SYMBOL_GPL. 2) Have register_netdev, sk_alloc, and __dev_get_by_name default to using init_net when NULL is specified in the namespace field. or 3) Change init_net to be EXPORT_SYMBOL_GPL. .. Obviously that should instead say: 3) Change init_net to be EXPORT_SYMBOL instead of EXPORT_SYMBOL_GPL. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] udp: memory accounting in IPv4
On Sat, Dec 01, 2007 at 02:08:31PM +0100, Eric Dumazet wrote: I agree adding yet another atomics ops is a big problem. Another idea, coupled with recent work on percpu storage done by Christoph Lameter, would be to use kind of a percpu_counter : Yes that's an interesting idea. We dont really need strong and precise memory accounting (UDP , but TCP as well), just some kind of limit to avoid memory to be too much used. BTW it's no big deal for TCP because it's completely serialised so it doesn't use atomic ops for the accounting. More importantly, it uses sk_forward_alloc so not every packet needs to touch the global counter. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 2/3][IPV6] Unify and cleanup calls to addrconf_sysctl_register
On Fri, Nov 30, 2007 at 09:54:51PM +0300, Pavel Emelyanov wrote: +static void addrconf_sysctl_register(struct inet6_dev *idev, + struct ipv6_devconf *p) Due to your simplification you no longer need the second argument as it can now be derived from the first as is the case for IPv4. So let's get rid of that while we're at it. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register
Herbert Xu wrote: On Fri, Nov 30, 2007 at 09:26:58PM +0300, Pavel Emelyanov wrote: Besides, the inet_device is passed to this function, but it is not needed there at all - just the device name and ifindex are required. But it is called devinet_* so an in_dev kind of makes sense :) #ifdef CONFIG_SYSCTL -devinet_sysctl_register(in_dev, in_dev-cnf); +devinet_sysctl_register(dev, in_dev-cnf); How about just giving it in_dev instead? Hmm... Makes sense. Should I recreate the while set or just make the incremental one? Thanks, Thanks, Pavel -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register
On Sat, Dec 01, 2007 at 04:25:21PM +0300, Pavel Emelyanov wrote: How about just giving it in_dev instead? Hmm... Makes sense. Should I recreate the while set or just make the incremental one? I've applied 1/3 for both cases so please just resend 2/3 and 3/3. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace
Denis V. Lunev wrote: Herbert Xu wrote: On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote: Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] All applied to net-2.6.25. diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b0cf075..f97b2a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +43,7 @@ struct net { /* unix sockets */ int sysctl_unix_max_dgram_qlen; + struct ctl_table_header *unix_ctl; }; But I gotta say this struct/file is going to be enormous. It's also one of those files that causes everything to get recompiled. Maybe we ought to make a rule that each subsystem only gets to have at most one entry in it :) Thanks, Good point, thanks. We'll start thinking in that direction. Right now it is not finally cursed with all staff around. Agree, the point is good :) but it has one pitfall :( Look, now we make _one_ dereference to get any net-xxx variable (sysctl, list head, lock, etc). When we force each subsystem has it's private pointer on this, we'll make them take _two_ dereferences. Before the whole net namespace stuff started we made _zero_ dereferences :) This may tell upon the performance. I'm not claiming that this is the major case against this idea, but when developing this idea, I think we should keep that fact in ming and pay good attention to performance regressions. Regards, Den Thanks, Pavel -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
namespace support requires network modules to say GPL
Now that we have network namespace support merged it is time to revisit the sysfs support so we can remove the dependency on !SYSFS. ... Now that the namespace updates are part of 2.6.24, there is a major inconsistency in network EXPORT_SYMBOLs. It used to be that an external network module could get away without having to add a MODULE_LICENSE(GPL*) line to the source. In support of that, common networking functions (still) use EXPORT_SYMBOL() rather than the more restrictive EXPORT_SYMBOL_GPL(). Eg. register_netdev(), sk_alloc(), __dev_get_by_name(). But now, none of those three are actually usable by default, because they all require init_net, which is EXPORT_SYMBOL_GPL(). So.. It appears that one of three things should really happen next: 1) Change the other exports to also be EXPORT_SYMBOL_GPL. 2) Have register_netdev, sk_alloc, and __dev_get_by_name default to using init_net when NULL is specified in the namespace field. or 3) Change init_net to be EXPORT_SYMBOL_GPL. Right now, things are just a bit inconsistent, and it's not clear whether the namespace changes intended this consequence or not. Cheers (as for me, I think all kernel modules are GPL, whether they have the MODULE_LICENSE line or not, so flames to /dev/null on that). -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 1/3][IPV4] Cleanup the devinet_sysctl_register
On Fri, Nov 30, 2007 at 09:21:00PM +0300, Pavel Emelyanov wrote: I moved the call to kmalloc() from the *t declaration into the code (this is confusing when a variable is initialized with the result of some call) and removed unneeded comment near the error path. Just like I did with the neigh ctl-s. Besides, I fixed the goto's and the labels - they were indented with spaces :( Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Applied to net-2.6.25. Thanks. -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register
On Fri, Nov 30, 2007 at 09:26:58PM +0300, Pavel Emelyanov wrote: Besides, the inet_device is passed to this function, but it is not needed there at all - just the device name and ifindex are required. But it is called devinet_* so an in_dev kind of makes sense :) #ifdef CONFIG_SYSCTL - devinet_sysctl_register(in_dev, in_dev-cnf); + devinet_sysctl_register(dev, in_dev-cnf); How about just giving it in_dev instead? Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 5/6][TR] Use ctl paths to register net/token-ring/ table
The same thing for token-ring - use ctl paths and get rid of external references on the tr_table. Unfortunately, I couldn't split this patch into cleanup and use-the-paths parts. As a lame excuse I can say, that the cleanup is just moving the tr_table from one file to another. Since the source file is empty after the move, I remove it. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 046e9d9..5bcec8b 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -49,9 +49,6 @@ static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) { return (struct trh_hdr *)skb_mac_header(skb); } -#ifdef CONFIG_SYSCTL -extern struct ctl_table tr_table[]; -#endif #endif /* This is an Token-Ring LLC structure */ diff --git a/net/802/Makefile b/net/802/Makefile index 977704a..68569ff 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -3,9 +3,8 @@ # # Check the p8022 selections against net/core/Makefile. -obj-$(CONFIG_SYSCTL) += sysctl_net_802.o obj-$(CONFIG_LLC) += p8022.o psnap.o -obj-$(CONFIG_TR) += p8022.o psnap.o tr.o sysctl_net_802.o +obj-$(CONFIG_TR) += p8022.o psnap.o tr.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI)+= hippi.o diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c deleted file mode 100644 index ead5603..000 --- a/net/802/sysctl_net_802.c +++ /dev/null @@ -1,33 +0,0 @@ -/* -*- linux-c -*- - * sysctl_net_802.c: sysctl interface to net 802 subsystem. - * - * Begun April 1, 1996, Mike Shaver. - * Added /proc/sys/net/802 directory entry (empty =) ). [MS] - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include linux/mm.h -#include linux/if_tr.h -#include linux/sysctl.h - -#ifdef CONFIG_TR -extern int sysctl_tr_rif_timeout; -#endif - -struct ctl_table tr_table[] = { -#ifdef CONFIG_TR - { - .ctl_name = NET_TR_RIF_TIMEOUT, - .procname = rif_timeout, - .data = sysctl_tr_rif_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif /* CONFIG_TR */ - { 0 }, -}; diff --git a/net/802/tr.c b/net/802/tr.c index d8a5386..23fa151 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -35,6 +35,7 @@ #include linux/proc_fs.h #include linux/seq_file.h #include linux/init.h +#include linux/sysctl.h #include net/arp.h #include net/net_namespace.h @@ -634,6 +635,26 @@ struct net_device *alloc_trdev(int sizeof_priv) return alloc_netdev(sizeof_priv, tr%d, tr_setup); } +#ifdef CONFIG_SYSCTL +static struct ctl_table tr_table[] = { + { + .ctl_name = NET_TR_RIF_TIMEOUT, + .procname = rif_timeout, + .data = sysctl_tr_rif_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { 0 }, +}; + +static __initdata struct ctl_path tr_path[] = { + { .procname = net, .ctl_name = CTL_NET, }, + { .procname = token-ring, .ctl_name = NET_TR, }, + { } +}; +#endif + /* * Called during bootup. We don't actually have to initialise * too much for this. @@ -644,7 +665,9 @@ static int __init rif_init(void) rif_timer.expires = sysctl_tr_rif_timeout; setup_timer(rif_timer, rif_check_expire, 0); add_timer(rif_timer); - +#ifdef CONFIG_SYSCTL + register_sysctl_paths(tr_path, tr_table); +#endif proc_net_fops_create(init_net, tr_rif, S_IRUGO, rif_seq_fops); return 0; } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index a4f0ed8..16ad14b 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -31,14 +31,6 @@ #endif struct ctl_table net_table[] = { -#ifdef CONFIG_TR - { - .ctl_name = NET_TR, - .procname = token-ring, - .mode = 0555, - .child = tr_table, - }, -#endif { 0 }, }; -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 (resend) 2/3][IPV4] Unify and cleanup calls to devinet_sysctl_register
Currently this call is used to register sysctls for devices and for the default confs. The all sysctls are registered separately. Besides, the inet_device is passed to this function, but it is not needed there at all - just the device name and ifindex are required. Thanks to Herbert, who noticed, that this call doesn't even require the devconf pointer (the last argument) - all we need we can take from the in_device itself. The fix is to make a __devinet_sysctl_register(), which registers sysctls for all devices we need, including default and all :) The original devinet_sysctl_register() works with struct net_device, not the inet_device, and calls the introduced function, passing the device name and ifindex (to be used as procname and ctl_name) into it. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 34c34c6..385896f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -98,8 +98,7 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL -static void devinet_sysctl_register(struct in_device *in_dev, - struct ipv4_devconf *p); +static void devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct ipv4_devconf *p); #endif @@ -173,7 +172,7 @@ static struct in_device *inetdev_init(struct net_device *dev) in_dev_hold(in_dev); #ifdef CONFIG_SYSCTL - devinet_sysctl_register(in_dev, in_dev-cnf); + devinet_sysctl_register(in_dev); #endif ip_mc_init_dev(in_dev); if (dev-flags IFF_UP) @@ -1120,7 +1119,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, neigh_sysctl_unregister(in_dev-arp_parms); neigh_sysctl_register(dev, in_dev-arp_parms, NET_IPV4, NET_IPV4_NEIGH, ipv4, NULL, NULL); - devinet_sysctl_register(in_dev, in_dev-cnf); + devinet_sysctl_register(in_dev); #endif break; } @@ -1502,13 +1501,11 @@ static struct devinet_sysctl_table { }, }; -static void devinet_sysctl_register(struct in_device *in_dev, - struct ipv4_devconf *p) +static void __devinet_sysctl_register(char *dev_name, int ctl_name, + struct ipv4_devconf *p) { int i; - struct net_device *dev = in_dev ? in_dev-dev : NULL; struct devinet_sysctl_table *t; - char *dev_name = NULL; t = kmemdup(devinet_sysctl, sizeof(*t), GFP_KERNEL); if (!t) @@ -1519,13 +1516,7 @@ static void devinet_sysctl_register(struct in_device *in_dev, t-devinet_vars[i].extra1 = p; } - if (dev) { - dev_name = dev-name; - t-devinet_dev[0].ctl_name = dev-ifindex; - } else { - dev_name = default; - t-devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; - } + t-devinet_dev[0].ctl_name = ctl_name; /* * Make a copy of dev_name, because '.procname' is regarded as const @@ -1557,6 +1548,12 @@ out: return; } +static void devinet_sysctl_register(struct in_device *idev) +{ + return __devinet_sysctl_register(idev-dev-name, idev-dev-ifindex, + idev-cnf); +} + static void devinet_sysctl_unregister(struct ipv4_devconf *p) { if (p-sysctl) { @@ -1578,9 +1575,10 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); #ifdef CONFIG_SYSCTL - devinet_sysctl.sysctl_header = - register_sysctl_table(devinet_sysctl.devinet_root_dir); - devinet_sysctl_register(NULL, ipv4_devconf_dflt); + __devinet_sysctl_register(all, NET_PROTO_CONF_ALL, + ipv4_devconf); + __devinet_sysctl_register(default, NET_PROTO_CONF_DEFAULT, + ipv4_devconf_dflt); #endif } -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 (resend) 3/3][IPV4] Use ctl paths to register devinet sysctls
This looks very much like the patch for neighbors. The path is also located on the stack and is prepared inside the function. This time, the call to the registering function is guarded with the RTNL lock, but I decided to keep it on the stack not to litter the devinet.c file with unneeded names and to make it look similar to the neighbors code. This is also intended to help us with the net namespaces and saves the vmlinux size as well - this time by more than 670 bytes. The difference from the first version is just the patch offsets, that changed due to changes in the patch #2. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 385896f..c19c8db 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1431,11 +1431,8 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; - ctl_table devinet_dev[2]; - ctl_table devinet_conf_dir[2]; - ctl_table devinet_proto_dir[2]; - ctl_table devinet_root_dir[2]; + struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX]; + char *dev_name; } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, forwarding, @@ -1467,38 +1464,6 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, promote_secondaries), }, - .devinet_dev = { - { - .ctl_name = NET_PROTO_CONF_ALL, - .procname = all, - .mode = 0555, - .child = devinet_sysctl.devinet_vars, - }, - }, - .devinet_conf_dir = { - { - .ctl_name = NET_IPV4_CONF, - .procname = conf, - .mode = 0555, - .child = devinet_sysctl.devinet_dev, - }, - }, - .devinet_proto_dir = { - { - .ctl_name = NET_IPV4, - .procname = ipv4, - .mode = 0555, - .child = devinet_sysctl.devinet_conf_dir, - }, - }, - .devinet_root_dir = { - { - .ctl_name = CTL_NET, - .procname = net, - .mode = 0555, - .child = devinet_sysctl.devinet_proto_dir, - }, - }, }; static void __devinet_sysctl_register(char *dev_name, int ctl_name, @@ -1507,6 +1472,16 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name, int i; struct devinet_sysctl_table *t; +#define DEVINET_CTL_PATH_DEV 3 + + struct ctl_path devinet_ctl_path[] = { + { .procname = net, .ctl_name = CTL_NET, }, + { .procname = ipv4, .ctl_name = NET_IPV4, }, + { .procname = conf, .ctl_name = NET_IPV4_CONF, }, + { /* to be set */ }, + { }, + }; + t = kmemdup(devinet_sysctl, sizeof(*t), GFP_KERNEL); if (!t) goto out; @@ -1516,24 +1491,20 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name, t-devinet_vars[i].extra1 = p; } - t-devinet_dev[0].ctl_name = ctl_name; - /* * Make a copy of dev_name, because '.procname' is regarded as const * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!dev_name) + t-dev_name = kstrdup(dev_name, GFP_KERNEL); + if (!t-dev_name) goto free; - t-devinet_dev[0].procname= dev_name; - t-devinet_dev[0].child = t-devinet_vars; - t-devinet_conf_dir[0].child = t-devinet_dev; - t-devinet_proto_dir[0].child = t-devinet_conf_dir; - t-devinet_root_dir[0].child = t-devinet_proto_dir; + devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t-dev_name; + devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name; - t-sysctl_header = register_sysctl_table(t-devinet_root_dir); + t-sysctl_header = register_sysctl_paths(devinet_ctl_path, + t-devinet_vars); if (!t-sysctl_header) goto free_procname; @@ -1541,7 +1512,7 @@ static void __devinet_sysctl_register(char *dev_name, int ctl_name, return; free_procname: - kfree(dev_name); + kfree(t-dev_name); free: kfree(t); out: @@ -1560,7 +1531,7 @@
[PATCH net-2.6.25 (resend) 2/3][IPV6] Unify and cleanup calls to addrconf_sysctl_register
Currently this call is (ab)used similar to devinet one - it registers sysctls for devices and for the default confs, while the all sysctls are registered separately. But unlike its devinet brother, the passed inet6_device is needed. The fix is to make a __addrconf_sysctl_register(), which registers sysctls for all devices we need, including default and all :) The original addrconf_sysctl_register() calls the introduced function, passing the inet6_device, device name and ifindex (to be used as procname and ctl_name) into it. Thanks to Herbert again for pointing out, that we can shrink the argument list to 1 :) Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2d2886a..ea1673d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -101,7 +101,7 @@ #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) #ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); +static void addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct ipv6_devconf *p); #endif @@ -400,7 +400,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) NET_IPV6_NEIGH, ipv6, ndisc_ifinfo_sysctl_change, NULL); - addrconf_sysctl_register(ndev, ndev-cnf); + addrconf_sysctl_register(ndev); #endif /* protected by rtnl_lock */ rcu_assign_pointer(dev-ip6_ptr, ndev); @@ -2386,7 +2386,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, NET_IPV6, NET_IPV6_NEIGH, ipv6, ndisc_ifinfo_sysctl_change, NULL); - addrconf_sysctl_register(idev, idev-cnf); + addrconf_sysctl_register(idev); #endif err = snmp6_register_dev(idev); if (err) @@ -4118,12 +4118,11 @@ static struct addrconf_sysctl_table }, }; -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) +static void __addrconf_sysctl_register(char *dev_name, int ctl_name, + struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct net_device *dev = idev ? idev-dev : NULL; struct addrconf_sysctl_table *t; - char *dev_name = NULL; t = kmemdup(addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) @@ -4133,13 +4132,6 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf t-addrconf_vars[i].data += (char*)p - (char*)ipv6_devconf; t-addrconf_vars[i].extra1 = idev; /* embedded; no ref */ } - if (dev) { - dev_name = dev-name; - t-addrconf_dev[0].ctl_name = dev-ifindex; - } else { - dev_name = default; - t-addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; - } /* * Make a copy of dev_name, because '.procname' is regarded as const @@ -4150,6 +4142,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf if (!dev_name) goto free; + t-addrconf_dev[0].ctl_name = ctl_name; t-addrconf_dev[0].procname = dev_name; t-addrconf_dev[0].child = t-addrconf_vars; @@ -4172,6 +4165,12 @@ out: return; } +static void addrconf_sysctl_register(struct inet6_dev *idev) +{ + __addrconf_sysctl_register(idev-dev-name, idev-dev-ifindex, + idev, idev-cnf); +} + static void addrconf_sysctl_unregister(struct ipv6_devconf *p) { if (p-sysctl) { @@ -4270,9 +4269,10 @@ int __init addrconf_init(void) ipv6_addr_label_rtnl_register(); #ifdef CONFIG_SYSCTL - addrconf_sysctl.sysctl_header = - register_sysctl_table(addrconf_sysctl.addrconf_root_dir); - addrconf_sysctl_register(NULL, ipv6_devconf_dflt); + __addrconf_sysctl_register(all, NET_PROTO_CONF_ALL, + NULL, ipv6_devconf); + __addrconf_sysctl_register(default, NET_PROTO_CONF_DEFAULT, + NULL, ipv6_devconf_dflt); #endif return 0; -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
Hi Vitaly, With that patch fixed.c now fully emulates MDIO bus, thus no need to duplicate PHY layer functionality. That, in turn, drastically simplifies the code, and drops down line count. As an additional bonus, now there is no need to register MDIO bus for each PHY, all emulated PHYs placed on the platform fixed MDIO bus. There is also no more need to pre-allocate PHYs via .config option, this is all now handled dynamically. p.s. Don't even try to understand patch content! Better: apply patch and look into resulting drivers/net/phy/fixed.c. If i understand your code correctly, you seem to rely on the fact that fixed_phy_add() is called before the fixed MDIO bus is scanned for devices. How is this supposed to work for modules or for the PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned during fs_soc initialization but during device initialization? I tried to add fixed-phy support to fs_enet, but the fixed phy is not found this way. --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -36,6 +36,7 @@ #include linux/fs.h #include linux/platform_device.h #include linux/phy.h +#include linux/phy_fixed.h #include linux/vmalloc.h #include asm/pgtable.h @@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct device_node *np, struct device_node *phynode, *mdionode; struct resource res; int ret = 0, len; + const u32 *data; + struct fixed_phy_status status = {}; + + data = of_get_property(np, fixed-link, NULL); + if (data) { + status.link = 1; + status.duplex = data[1]; + status.speed = data[2]; + + ret = fixed_phy_add(PHY_POLL, data[0], status); + if (ret) + return ret; + + snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); + return 0; + } - const u32 *data = of_get_property(np, phy-handle, len); + data = of_get_property(np, phy-handle, len); if (!data || len != 4) return -EINVAL; Thanks, Jochen -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Please pull 'upstream-davem' branch of wireless-2.6
On Fri, Nov 30, 2007 at 09:23:06PM -0500, John W. Linville wrote: Dave/Herbert, Here are a few intended for 2.6.25. The bulk of them are the beginnings of support for 802.11n in mac80211. There is also a rework of the support for devices which can run scans in hardware, and a couple of additions to feature-removal-schedule.txt heralding the end of softmac. All applied to net-2.6.25. Thanks John! -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-2.6.25 (resend) 3/3][IPV6] Use ctl paths to register addrconf sysctls
On Sat, Dec 01, 2007 at 04:46:41PM +0300, Pavel Emelyanov wrote: This looks very much like the patch for ipv4's devinet. This is also intended to help us with the net namespaces and saves the ipv6.ko size by ~320 bytes. The difference from the first version is just the patch offsets, that changed due to changes in the patch #2. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Both applied. Thanks! -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace
Herbert Xu wrote: On Fri, Nov 30, 2007 at 07:37:28PM +0300, Pavel Emelyanov wrote: Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] All applied to net-2.6.25. diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b0cf075..f97b2a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +43,7 @@ struct net { /* unix sockets */ int sysctl_unix_max_dgram_qlen; +struct ctl_table_header *unix_ctl; }; But I gotta say this struct/file is going to be enormous. It's also one of those files that causes everything to get recompiled. Maybe we ought to make a rule that each subsystem only gets to have at most one entry in it :) Thanks, Good point, thanks. We'll start thinking in that direction. Right now it is not finally cursed with all staff around. Regards, Den -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-2.6.25 (resend) 3/3][IPV6] Use ctl paths to register addrconf sysctls
This looks very much like the patch for ipv4's devinet. This is also intended to help us with the net namespaces and saves the ipv6.ko size by ~320 bytes. The difference from the first version is just the patch offsets, that changed due to changes in the patch #2. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ea1673d..dbff389 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3848,10 +3848,7 @@ static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table addrconf_vars[__NET_IPV6_MAX]; - ctl_table addrconf_dev[2]; - ctl_table addrconf_conf_dir[2]; - ctl_table addrconf_proto_dir[2]; - ctl_table addrconf_root_dir[2]; + char *dev_name; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { @@ -4072,50 +4069,6 @@ static struct addrconf_sysctl_table .ctl_name = 0, /* sentinel */ } }, - .addrconf_dev = { - { - .ctl_name = NET_PROTO_CONF_ALL, - .procname = all, - .mode = 0555, - .child = addrconf_sysctl.addrconf_vars, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_conf_dir = { - { - .ctl_name = NET_IPV6_CONF, - .procname = conf, - .mode = 0555, - .child = addrconf_sysctl.addrconf_dev, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_proto_dir = { - { - .ctl_name = NET_IPV6, - .procname = ipv6, - .mode = 0555, - .child = addrconf_sysctl.addrconf_conf_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_root_dir = { - { - .ctl_name = CTL_NET, - .procname = net, - .mode = 0555, - .child = addrconf_sysctl.addrconf_proto_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, }; static void __addrconf_sysctl_register(char *dev_name, int ctl_name, @@ -4124,6 +4077,17 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, int i; struct addrconf_sysctl_table *t; +#define ADDRCONF_CTL_PATH_DEV 3 + + struct ctl_path addrconf_ctl_path[] = { + { .procname = net, .ctl_name = CTL_NET, }, + { .procname = ipv6, .ctl_name = NET_IPV6, }, + { .procname = conf, .ctl_name = NET_IPV6_CONF, }, + { /* to be set */ }, + { }, + }; + + t = kmemdup(addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) goto out; @@ -4138,19 +4102,15 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!dev_name) + t-dev_name = kstrdup(dev_name, GFP_KERNEL); + if (!t-dev_name) goto free; - t-addrconf_dev[0].ctl_name = ctl_name; - t-addrconf_dev[0].procname = dev_name; - - t-addrconf_dev[0].child = t-addrconf_vars; - t-addrconf_conf_dir[0].child = t-addrconf_dev; - t-addrconf_proto_dir[0].child = t-addrconf_conf_dir; - t-addrconf_root_dir[0].child = t-addrconf_proto_dir; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t-dev_name; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; - t-sysctl_header = register_sysctl_table(t-addrconf_root_dir); + t-sysctl_header = register_sysctl_paths(addrconf_ctl_path, + t-addrconf_vars); if (t-sysctl_header == NULL) goto free_procname; @@ -4158,7 +4118,7 @@ static void __addrconf_sysctl_register(char *dev_name, int ctl_name, return; free_procname: - kfree(dev_name); + kfree(t-dev_name); free: kfree(t); out: @@ -4177,7 +4137,7 @@ static void addrconf_sysctl_unregister(struct ipv6_devconf *p) struct addrconf_sysctl_table *t = p-sysctl; p-sysctl = NULL;
Re: [PATCH net-2.6.25 (resend) 3/3][IPV4] Use ctl paths to register devinet sysctls
On Sat, Dec 01, 2007 at 04:39:58PM +0300, Pavel Emelyanov wrote: The difference from the first version is just the patch offsets, that changed due to changes in the patch #2. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] All applied to net-2.6.25. Thanks Pavel. -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
SSB: No is not an answer
Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n Support for the Sonics Silicon Backplane bus. You only need to enable this option, if you are configuring a kernel for an embedded system with this bus. It will be auto-selected if needed in other environments. The module will be called ssb. If unsure, say N. Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: SSB: No is not an answer
On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote: Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n Support for the Sonics Silicon Backplane bus. You only need to enable this option, if you are configuring a kernel for an embedded system with this bus. It will be auto-selected if needed in other environments. The module will be called ssb. If unsure, say N. Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) I think this is OK -- it isn't really offering the choice to say no anyway. You must have turned-on B44 or B43(LEGACY) already? So, your choice is merely whether to have it built-in or as a module. John -- John W. Linville [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: SSB: No is not an answer
Em Sat, Dec 01, 2007 at 12:45:32PM -0500, John W. Linville escreveu: On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote: Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n Support for the Sonics Silicon Backplane bus. You only need to enable this option, if you are configuring a kernel for an embedded system with this bus. It will be auto-selected if needed in other environments. The module will be called ssb. If unsure, say N. Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) I think this is OK -- it isn't really offering the choice to say no anyway. You must have turned-on B44 or B43(LEGACY) already? So, your choice is merely whether to have it built-in or as a module. Ok, so the comment on being unsure is wrong as we can't say N as suggested :-) - Arnaldo -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
On Sat, 01 Dec 2007 14:48:54 +0100 Jochen Friedrich wrote: Hi Vitaly, With that patch fixed.c now fully emulates MDIO bus, thus no need to duplicate PHY layer functionality. That, in turn, drastically simplifies the code, and drops down line count. As an additional bonus, now there is no need to register MDIO bus for each PHY, all emulated PHYs placed on the platform fixed MDIO bus. There is also no more need to pre-allocate PHYs via .config option, this is all now handled dynamically. p.s. Don't even try to understand patch content! Better: apply patch and look into resulting drivers/net/phy/fixed.c. If i understand your code correctly, you seem to rely on the fact that fixed_phy_add() is called before the fixed MDIO bus is scanned for devices. How is this supposed to work for modules or for the PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned during fs_soc initialization but during device initialization? Well, this is kind of known issue - to work it around for now, place PHY lib after fs_enet in Makefile. This way it works for me for _NEW_BINDING and mpc866ads. I tried to add fixed-phy support to fs_enet, but the fixed phy is not found this way. The point is I have the code and it works now(for fs_enet etc.), but I need to find the way for the fixed phy pinning to work in either order with phylib. If you have ideas, please go ahead :) --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -36,6 +36,7 @@ #include linux/fs.h #include linux/platform_device.h #include linux/phy.h +#include linux/phy_fixed.h #include linux/vmalloc.h #include asm/pgtable.h @@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct device_node *np, struct device_node *phynode, *mdionode; struct resource res; int ret = 0, len; + const u32 *data; + struct fixed_phy_status status = {}; + + data = of_get_property(np, fixed-link, NULL); + if (data) { + status.link = 1; + status.duplex = data[1]; + status.speed = data[2]; + + ret = fixed_phy_add(PHY_POLL, data[0], status); + if (ret) + return ret; + + snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); + return 0; + } - const u32 *data = of_get_property(np, phy-handle, len); + data = of_get_property(np, phy-handle, len); if (!data || len != 4) return -EINVAL; Thanks, Jochen -- Sincerely, Vitaly -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
On Sat, 01 Dec 2007 08:10:17 -0500 Mark Lord [EMAIL PROTECTED] wrote: Now that we have network namespace support merged it is time to revisit the sysfs support so we can remove the dependency on !SYSFS. ... Now that the namespace updates are part of 2.6.24, there is a major inconsistency in network EXPORT_SYMBOLs. It used to be that an external network module could get away without having to add a MODULE_LICENSE(GPL*) line to the source. In support of that, common networking functions (still) use EXPORT_SYMBOL() rather than the more restrictive EXPORT_SYMBOL_GPL(). Eg. register_netdev(), sk_alloc(), __dev_get_by_name(). But now, none of those three are actually usable by default, because they all require init_net, which is EXPORT_SYMBOL_GPL(). Then init_net needs to be not GPL limited. Sorry, we need to allow non GPL network drivers. There is a fine line between keeping the binary seething masses from accessing random kernel functions, and allowing reasonable (but still non GPL) things like ndiswrapper to use network device interface. -- Stephen Hemminger [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Mcast packet loss 2.6.8.1 kernel
On Fri, Nov 16, 2007 at 12:38:22PM +0100, Eric Dumazet wrote: Hello Bernd I did some investigations on the netstat -s problem and one fix is to change the size of char buf1[1024], buf2[1024]; I changed it now to 2048, and included your page aligend io buffer patch. It is available in net-tools HEAD (net-tools.berlios.de) It would be good if I can get a few tests on different architectures. Gruss Bernd -- (OO) -- [EMAIL PROTECTED] -- ( .. )[EMAIL PROTECTED],linux.de,debian.org} http://www.eckes.org/ o--o 1024D/E383CD7E [EMAIL PROTECTED] v:+497211603874 f:+49721151516129 (OO) When cryptography is outlawed, bayl bhgynjf jvyy unir cevinpl! -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Then init_net needs to be not GPL limited. Sorry, we need to allow non GPL network drivers. There is a fine line between keeping the Why - they aren't exactly likely to be permissible by law binary seething masses from accessing random kernel functions, and allowing reasonable (but still non GPL) things like ndiswrapper to use network device interface. Its up to the ndiswrapper authors how the licence their code, but they should respect how we licence ours. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4 (resent) net-2.6.25][UNIX] Make the unix sysctl tables per-namespace
Pavel Emelyanov [EMAIL PROTECTED] writes: But I gotta say this struct/file is going to be enormous. It's also one of those files that causes everything to get recompiled. Maybe we ought to make a rule that each subsystem only gets to have at most one entry in it :) Thanks, Good point, thanks. We'll start thinking in that direction. Right now it is not finally cursed with all staff around. Agree, the point is good :) but it has one pitfall :( Look, now we make _one_ dereference to get any net-xxx variable (sysctl, list head, lock, etc). When we force each subsystem has it's private pointer on this, we'll make them take _two_ dereferences. Before the whole net namespace stuff started we made _zero_ dereferences :) This may tell upon the performance. I'm not claiming that this is the major case against this idea, but when developing this idea, I think we should keep that fact in ming and pay good attention to performance regressions. Currently in my proof of concept tree I am at 65 variables and 648 bytes. This includes patches that are largely complete for ipv4. In number of variables this is about half of the current struct net_device, so I think the usage looks managable. I agree that both performance and size are significant concerns, and that is essentially why struct net has the structure it does today. I print the size of struct net out at boot, we have to actually look at struct net when we make changes, so I don't think size bloat is going to happen unnoticed. By keeping the size below PAGE_SIZE, and keeping the number of variables per network subsystem few and small we should be ok. The fact that changing struct net causes the core of the networking stack to recompile is an added bonus that should also discourage people from playing with it to much. My recommendation is to keep an eye on struct net and if what we are doing there becomes a problem address it then. Eric -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Stephen Hemminger [EMAIL PROTECTED] writes: On Sat, 01 Dec 2007 08:10:17 -0500 Mark Lord [EMAIL PROTECTED] wrote: Now that we have network namespace support merged it is time to revisit the sysfs support so we can remove the dependency on !SYSFS. ... Now that the namespace updates are part of 2.6.24, there is a major inconsistency in network EXPORT_SYMBOLs. It used to be that an external network module could get away without having to add a MODULE_LICENSE(GPL*) line to the source. In support of that, common networking functions (still) use EXPORT_SYMBOL() rather than the more restrictive EXPORT_SYMBOL_GPL(). Eg. register_netdev(), sk_alloc(), __dev_get_by_name(). But now, none of those three are actually usable by default, because they all require init_net, which is EXPORT_SYMBOL_GPL(). Which alternative kernel does the above comment apply to? Then init_net needs to be not GPL limited. Sorry, we need to allow non GPL network drivers. For the record network drivers should not be affected. As a practical measure that just gets unmaintainable and it is unnecessary. There are specific exceptions where network drivers mess with the userspace interfaces where I do have some impact. However if you are messing with our userspace interface especially with network namespaces in place I don't see how it is possible for you to be anything other then a derivative work, and something we need in tree to keep maintenance a manageable thing. It should just be the core of the network stack that struct net has some effect on. There is a fine line between keeping the binary seething masses from accessing random kernel functions, and allowing reasonable (but still non GPL) things like ndiswrapper to use network device interface. Does ndiswrapper break? If so what dubious and unsupportable thing is it doing? Eric -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
On Sat, 1 Dec 2007 19:23:41 + Alan Cox [EMAIL PROTECTED] wrote: Then init_net needs to be not GPL limited. Sorry, we need to allow non GPL network drivers. There is a fine line between keeping the Why - they aren't exactly likely to be permissible by law Matter of debate in which there are several opinions. I don't like binary modules either, but don't feel qualified to render a legal opinion. binary seething masses from accessing random kernel functions, and allowing reasonable (but still non GPL) things like ndiswrapper to use network device interface. Its up to the ndiswrapper authors how the licence their code, but they should respect how we licence ours. Then change the license, explicitly and get it approved, forcing license changes by technically subversive means is bad policy. It is like Euro bureaucrats sneaking in software patents in regulations. If you want to have the debate and can get it resolved, then I support you. Actually, the whole mess would go away if the api for dev_get_by_ hadn't been changed in the namespace transition. IMHO the interface to dev_get_by_name() should not have added a namespace parameter, of the callers in the tree, only two use a different namespace. So it would have been better to to introduce dev_get_by_name_ns() with the extra parameter. Can we get this resolved before 2.6.24 is released? Going back and forth on API's is just needless frottage. -- Stephen Hemminger [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Then change the license, explicitly and get it approved, forcing license changes by technically subversive means is bad policy. It is like Euro bureaucrats I don't need to - the licence has been the same since about 0.12 Alan -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Stephen Hemminger [EMAIL PROTECTED] writes: Actually, the whole mess would go away if the api for dev_get_by_ hadn't been changed in the namespace transition. IMHO the interface to dev_get_by_name() should not have added a namespace parameter, of the callers in the tree, only two use a different namespace. So it would have been better to to introduce dev_get_by_name_ns() with the extra parameter. As a general rule if you are calling dev_get_by_name and taking an init_net parameter that means you code has not yet been converted to actually support network namespaces. Not everything can be safely changed at once so we take it by steps. When the code fully supports network namespaces practically nothing will take an init_net parameter. The network namespace parameter will come in some form from userspace. Either from current or from the network socket. Except for boot time initialization I don't know of any cases using dev_get_by_ that won't need to be modified before the network namespace work is complete. I believe I mentioned that this getting the fully network namespace support was going to take a while and a bunch of patches at the outset. Can we get this resolved before 2.6.24 is released? Going back and forth on API's is just needless frottage. Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. Or is their some legitimate usage of it by out of tree code that I'm not aware of? Eric -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Eric W. Biederman wrote: Stephen Hemminger [EMAIL PROTECTED] writes: Actually, the whole mess would go away if the api for dev_get_by_ hadn't been changed in the namespace transition. IMHO the interface to dev_get_by_name() should not have added a namespace parameter, of the callers in the tree, only two use a different namespace. So it would have been better to to introduce dev_get_by_name_ns() with the extra parameter. As a general rule if you are calling dev_get_by_name and taking an init_net parameter that means you code has not yet been converted to actually support network namespaces. Not everything can be safely changed at once so we take it by steps. When the code fully supports network namespaces practically nothing will take an init_net parameter. The network namespace parameter will come in some form from userspace. Either from current or from the network socket. Except for boot time initialization I don't know of any cases using dev_get_by_ that won't need to be modified before the network namespace work is complete. I believe I mentioned that this getting the fully network namespace support was going to take a while and a bunch of patches at the outset. Can we get this resolved before 2.6.24 is released? Going back and forth on API's is just needless frottage. Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. .. And what should code be passing in when # CONFIG_NET_NS is not set ? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: SSB: No is not an answer
On Saturday 01 December 2007 20:00:23 Arnaldo Carvalho de Melo wrote: Em Sat, Dec 01, 2007 at 12:45:32PM -0500, John W. Linville escreveu: On Sat, Dec 01, 2007 at 03:17:44PM -0200, Arnaldo Carvalho de Melo wrote: Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) n Support for the Sonics Silicon Backplane bus. You only need to enable this option, if you are configuring a kernel for an embedded system with this bus. It will be auto-selected if needed in other environments. The module will be called ssb. If unsure, say N. Sonics Silicon Backplane support (SSB) [M/y/?] (NEW) I think this is OK -- it isn't really offering the choice to say no anyway. You must have turned-on B44 or B43(LEGACY) already? So, your choice is merely whether to have it built-in or as a module. Ok, so the comment on being unsure is wrong as we can't say N as suggested :-) Oh, come on... Read the _whole_ comment. -- Greetings Michael. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
On Sat, 01 Dec 2007 15:21:12 -0500 Mark Lord [EMAIL PROTECTED] wrote: Eric W. Biederman wrote: Stephen Hemminger [EMAIL PROTECTED] writes: Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. .. And what should code be passing in when # CONFIG_NET_NS is not set ? network drivers probably really really don't want to call dev_get_by_XXX... in fact no NIC driver in drivers/net does so! Sounds like whatever driver you're looking at has a nasty bug in that it's using non-driver APIs... -- If you want to reach me at my work email, use [EMAIL PROTECTED] For development, discussion and tips for power savings, visit http://www.lesswatts.org -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Mark Lord [EMAIL PROTECTED] writes: Can we get this resolved before 2.6.24 is released? Going back and forth on API's is just needless frottage. Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. .. And what should code be passing in when # CONFIG_NET_NS is not set ? Mostly CONFIG_NET_NS is a define to keep us from exposing the feature to user space not to remove the code impact. People could not stand the look of the code that would actually allow us to compile everything out. So all of the struct net * fields remain when !CONFIG_NET_NS. Including the global variable init_net. Eric -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv7 iptables] Interface group match
Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM: ... Index: extensions/libxt_ifgroup.man === --- extensions/libxt_ifgroup.man (revision 0) +++ extensions/libxt_ifgroup.man (revision 0) @@ -0,0 +1,36 @@ +Maches packets on an interface if it is in the same interface group +Matches packets on an interface if it is in the same interface group +as specified by the +.B --ifgroup-in +or +.B --ifgroup-in +.B --ifgroup-out +parameter. If a mask is also specified, the masked value of +the inteface's group must be equal to the given value of the +the interface's group must be equal to the given value of the +.B --ifgroup-in +or +.B --ifgroup-out +parameter to match. This match is available in all tables. +.TP +.BR [!] --ifgroup-in \fIgroup[/mask]\fR +This specifies the interface group of input interface and the optional mask. +Valid only in the in the +Valid only in the +.B PREROUTING +and +.B INPUT +and +.B FORWARD +chains, and user-defined chains which are only called from those +chains. +.TP +.BR [!] --ifgroup-out \fIgroup[/mask]\fR +This specifies the interface group of out interface and the optional mask. +This specifies the interface group of output interface and the optional mask. +Valid only in the in the +.B FORWARD +and +.B OUTPUT +and +.B POSTROUTING +chains, and user-defined chains which are only called from those +chains. +.RS +.PP + Regards, Jarek P. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Please pull 'fixes-jgarzik' branch of wireless-2.6
John W. Linville wrote: Jeff, A few fixes intended for 2.6.24... Let me know if there are any problems! Thanks, John --- Individual patches are available here: http://www.kernel.org/pub/linux/kernel/people/linville/wireless-2.6.git fixes-jgarzik --- The following changes since commit d9f8bcbf67a0ee67c8cb0734f003dfe916bb5248: Linus Torvalds (1): Linux 2.6.24-rc3 are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git fixes-jgarzik David Woodhouse (1): libertas: Don't set NETIF_F_IPV6_CSUM in dev-features Holger Schurig (1): libertas: let more than one MAC event through pulled -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv7 iptables] Interface group match
Jarek Poplawski wrote, On 12/01/2007 10:19 PM: Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM: ... Index: extensions/libxt_ifgroup.man ... +Valid only in the in the +Valid only in the +.B FORWARD +and +.B OUTPUT +and +.B POSTROUTING +chains, and user-defined chains which are only called from those +chains. +.RS +.PP + Regards, Jarek P. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2.6.24 1/1]S2io: Fixed the case when the card initialization fails on mtu change
Sreenivasa Honnur wrote: Fix the case when the card initialization fails on a mtu change and then close is called (due to ifdown), which frees non existent rx buffers. - Returning appropriate error codes in init_nic function. - In s2io_close function s2io_card_down is called only when device is up. - In s2io_change_mtu function return value of s2io_card_up function is checked and returned if it failed. Signed-off-by: Surjit Reang [EMAIL PROTECTED] Signed-off-by: Sreenivasa Honnur [EMAIL PROTECTED] Signed-off-by: Ramkrishna Vepa [EMAIL PROTECTED] --- diff -Nurp patch_8/drivers/net/s2io.c patch_9/drivers/net/s2io.c --- patch_8/drivers/net/s2io.c 2007-11-20 23:31:57.0 +0530 +++ patch_9/drivers/net/s2io.c 2007-11-20 23:13:24.0 +0530 @@ -84,7 +84,7 @@ #include s2io.h #include s2io-regs.h -#define DRV_VERSION 2.0.26.8 +#define DRV_VERSION 2.0.26.9 /* S2io Driver name version. */ static char s2io_driver_name[] = Neterion; applied patch to 2.6.24-rc, except for the above chunk, which failed -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6] skge: FIFO Ram calculation error
applied 1-6 and additional 1-2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3][RESEND] phylib: add PHY interface modes for internal delay for tx and rx only
Kim Phillips wrote: Allow phylib specification of cases where hardware needs to configure PHYs for Internal Delay only on either RX or TX (not both). Signed-off-by: Kim Phillips [EMAIL PROTECTED] Tested-by: Anton Vorontsov [EMAIL PROTECTED] Acked-by: Li Yang [EMAIL PROTECTED] --- include/linux/phy.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index f0742b6..e10763d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -58,6 +58,8 @@ typedef enum { PHY_INTERFACE_MODE_RMII, PHY_INTERFACE_MODE_RGMII, PHY_INTERFACE_MODE_RGMII_ID, + PHY_INTERFACE_MODE_RGMII_RXID, + PHY_INTERFACE_MODE_RGMII_TXID, PHY_INTERFACE_MODE_RTBI } phy_interface_t; applied 1-3 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/1] ctc: make use of alloc_netdev()
applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] cxgb - fix T2 GSO
Divy Le Ray wrote: From: Divy Le Ray [EMAIL PROTECTED] The patch ensures that a GSO skb has enough headroom to push an encapsulating cpl_tx_pkt_lso header. Signed-off-by: Divy Le Ray [EMAIL PROTECTED] --- applied 1-3 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] sky2: revert to access PCI config via device space
Stephen Hemminger wrote: Using the hardware window into PCI config space is more reliable and smaller/faster than using the pci_config routines. It avoids issues with MMCONFIG etc. Reverts: 167f53d05fccb47b6eeadac7f6705b3f2f042d03 Please apply for 2.6.24 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] applied 1-3 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] e1000: Fix NAPI state bug when Rx complete
Auke Kok wrote: Don't exit polling when we have not yet used our budget, this causes the NAPI system to end up with a messed up poll list. Signed-off-by: Auke Kok [EMAIL PROTECTED] --- drivers/net/e1000/e1000_main.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index b7c3070..724f067 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3926,7 +3926,7 @@ e1000_clean(struct napi_struct *napi, int budget) work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned (work_done budget)) || + if ((!tx_cleaned (work_done == 0)) || !netif_running(poll_dev)) { applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] SET_NETDEV_DEV() in fec_mpc52xx.c
David Woodhouse wrote: This helps to allow the Fedora installer to use the built-in Ethernet on the Efika for a network install. Signed-off-by: David Woodhouse [EMAIL PROTECTED] --- a/drivers/net/fec_mpc52xx.c +++ b/drivers/net/fec_mpc52xx.c @@ -971,6 +971,8 @@ mpc52xx_fec_probe(struct of_device *op, const struct of_device_id *match) mpc52xx_fec_reset_stats(ndev); + SET_NETDEV_DEV(ndev, op-dev); + /* Register the new network device */ rv = register_netdev(ndev); applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Stop phy code from returning success to unknown ioctls.
David Woodhouse wrote: This kind of sucks, and prevents the Fedora installer from using the device for network installs... [EMAIL PROTECTED] phy]# iwconfig eth0 Warning: Driver for device eth0 has been compiled with an ancient version of Wireless Extension, while this program support version 11 and later. Some things may be broken... eth0ESSID:off/any Nickname: NWID:0 Channel:0 Access Point: 00:00:BF:81:14:E0 Bit Rate:-1.08206e+06 kb/s Sensitivity=0/0 RTS thr:off Fragment thr:off Encryption key:too big Power Management:off Signed-off-by: David Woodhouse [EMAIL PROTECTED] diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 9bc1177..7c9e6e3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -406,6 +406,9 @@ int phy_mii_ioctl(struct phy_device *phydev, phydev-drv-config_init) phydev-drv-config_init(phydev); break; + + default: + return -ENOTTY; } applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] via-velocity: don't oops on MTU change (resend)
Stephen Hemminger wrote: The VIA veloicty driver needs the following to allow changing MTU when down. The buffer size needs to be computed when device is brought up, not when device is initialized. This also fixes a bug where the buffer size was computed differently on change_mtu versus initial setting. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- This is a properly formatted version of previously submitted patch. Please apply for 2.6.24 applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] LIB82596: correct data types for hardware addresses
Thomas Bogendoerfer wrote: dma_addr_t is 64bit wide on some architectures (for example 64bit MIPS), so it's not a good idea to use it for 32bit wide addresses in descriptors. Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED] --- drivers/net/lib82596.c | 50 1 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c index 9a855e5..b59f442 100644 --- a/drivers/net/lib82596.c +++ b/drivers/net/lib82596.c @@ -176,8 +176,8 @@ struct i596_reg { struct i596_tbd { unsigned short size; unsigned short pad; - dma_addr_t next; - dma_addr_t data; + u32next; + u32data; u32 cache_pad[5]; /* Total 32 bytes... */ }; applied, though its incomplete for today's drivers. I recommend converting those data types to the sparse data types that indicate endian-ness (see __le32, etc.). Then verify that the code passes all sparse checks. See Documentation/sparse.txt for more info. Also, make sure it passes scripts/checkpatch.pl checks too, while you're at it... Thanks, Jeff -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
On Sat, Dec 01, 2007 at 02:48:54PM +0100, Jochen Friedrich wrote: Hi Vitaly, With that patch fixed.c now fully emulates MDIO bus, thus no need to duplicate PHY layer functionality. That, in turn, drastically simplifies the code, and drops down line count. As an additional bonus, now there is no need to register MDIO bus for each PHY, all emulated PHYs placed on the platform fixed MDIO bus. There is also no more need to pre-allocate PHYs via .config option, this is all now handled dynamically. p.s. Don't even try to understand patch content! Better: apply patch and look into resulting drivers/net/phy/fixed.c. If i understand your code correctly, you seem to rely on the fact that fixed_phy_add() is called before the fixed MDIO bus is scanned for devices. Yes, indeed. The other name of fixed phys are platform phys or platform MDIO bus on which virtual PHYs are placed. That is, these phys supposed to be created by the platform setup code (arch/). The rationale here is: we do hardware emulation, thus to make drivers actually see that hardware, we have to create it early. I tried to add fixed-phy support to fs_enet, but the fixed phy is not found this way. --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -36,6 +36,7 @@ #include linux/fs.h #include linux/platform_device.h #include linux/phy.h +#include linux/phy_fixed.h #include linux/vmalloc.h #include asm/pgtable.h @@ -1174,8 +1175,24 @@ static int __devinit find_phy(struct device_node *np, struct device_node *phynode, *mdionode; struct resource res; int ret = 0, len; + const u32 *data; + struct fixed_phy_status status = {}; + + data = of_get_property(np, fixed-link, NULL); + if (data) { + status.link = 1; + status.duplex = data[1]; + status.speed = data[2]; + + ret = fixed_phy_add(PHY_POLL, data[0], status); + if (ret) + return ret; + + snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); + return 0; + } - const u32 *data = of_get_property(np, phy-handle, len); + data = of_get_property(np, phy-handle, len); if (!data || len != 4) return -EINVAL; ^^ the correct solution is to implement arch_initcall function which will create fixed PHYs, and then leave only snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); part in the fs_enet's find_phy(). Try add something like this to the fsl_soc.c (compile untested): - - - - static int __init of_add_fixed_phys(void) { struct device_node *np; const u32 *prop; struct fixed_phy_status status = {}; while ((np = of_find_node_by_name(NULL, ethernet))) { data = of_get_property(np, fixed-link, NULL); if (!data) continue; status.link = 1; status.duplex = data[1]; status.speed = data[2]; ret = fixed_phy_add(PHY_POLL, data[0], status); if (ret) return ret; } return 0; } arch_initcall(of_add_fixed_phys); - - - - And remove fixed_phy_add() from the fs_enet. This should work nicely and also should be ideologically correct. ;-) How is this supposed to work for modules or for the PPC_CPM_NEW_BINDING mode where the device tree is no longer scanned during fs_soc initialization but during device initialization? We should mark fixed.c as bool. Fake/virtual/fixed/platform PHYs creation is architecture code anyway, can't be =m. -- Anton Vorontsov email: [EMAIL PROTECTED] backup email: [EMAIL PROTECTED] irc://irc.freenode.net/bd2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] sky2: align IP header on Rx if possible
Stephen Hemminger wrote: The sky2 driver was not aligning the IP header on receive buffers. This workaround is only needed on hardware with broken FIFO, newer chips without FIFO can just DMA to unaligned address. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] applied 1-2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [1/12] pasemi_mac: RX/TX ring management cleanup
Olof Johansson wrote: pasemi_mac: RX/TX ring management cleanup Prepare a bit for supporting multiple TX queues by cleaning up some of the ring management and shuffle things around a bit. Signed-off-by: Olof Johansson [EMAIL PROTECTED] applied 1-12 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] SGISEEQ: use cached memory access to make driver work on IP28
Thomas Bogendoerfer wrote: Following patch is clearly 2.6.25 material and is needed to get SGI IP28 machines supported. Thomas. SGI IP28 machines would need special treatment (enable adding addtional wait states) when accessing memory uncached. To avoid this pain I changed the driver to use only cached access to memory. Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED] applied. As I have noted to you previously, /please/ put extraneous comments /after/ a --- separator, so that they are not copied by git-am (Linus's email patch import tool) into the permanent kernel changelog. The above should look like: snip SGI IP28 machines would need special treatment (enable adding addtional wait states) when accessing memory uncached. To avoid this pain I changed the driver to use only cached access to memory. Signed-off-by: Thomas Bogendoerfer [EMAIL PROTECTED] --- Following patch is clearly 2.6.25 material and is needed to get SGI IP28 machines supported. Thomas. drivers/net/sgiseeq.c | 239 ++--- 1 files changed, 166 insertions(+), 73 deletions(-) /snip See Documentation/SubmittingPatches for more details, in particular 14) The canonical patch format or http://linux.yyz.us/patch-format.html Jeff -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ethtool: fix typo on setting speed 10000
Auke Kok wrote: From: Jesse Brandeburg [EMAIL PROTECTED] fix the typo in speed 1 setting. Signed-off-by: Jesse Brandeburg [EMAIL PROTECTED] Signed-off-by: Auke Kok [EMAIL PROTECTED] --- ethtool.c |2 +- applied -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Fw: [PATCH] Add the phy_device_release device method.
Thierry, could you resend this patch to me? I do not seem to have an apply-able version of this patch anywhere. The copy DaveM forwarded to me had problems (though the technical content looks OK) Thanks, Jeff -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Pull request for 'r6040' branch
Francois Romieu wrote: Please pull from branch 'r6040' in repository git://git.kernel.org/pub/scm/linux/kernel/git/romieu/netdev-2.6.git r6040 to get the changes below. Distance from 'netdev-2.6-upstream' (02e063b58b7c7084bae3d599c54dcf26c8efa9b7) -- 8dd657d2d82657c1d70219a704ccfe4fecfc55be f00c12227fe587b6c1bbb6b459394db29dc5fac0 c7eaa9bde00c778b53f778d49617353d3b9b0c21 cc27eeb9474a87b2073488f37d9e90e6a3557664 Diffstat drivers/net/r6040.c | 138 ++- include/linux/pci_ids.h |1 - 2 files changed, 64 insertions(+), 75 deletions(-) Shortlog Francois Romieu (4): r6040: compile error r6040: remove virt_to_bus r6040: erroneous dev-priv r6040: cleanups changes pulled, thanks for helping to clean this up! -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
Vitaly Bordug wrote: With that patch fixed.c now fully emulates MDIO bus, thus no need to duplicate PHY layer functionality. That, in turn, drastically simplifies the code, and drops down line count. As an additional bonus, now there is no need to register MDIO bus for each PHY, all emulated PHYs placed on the platform fixed MDIO bus. There is also no more need to pre-allocate PHYs via .config option, this is all now handled dynamically. p.s. Don't even try to understand patch content! Better: apply patch and look into resulting drivers/net/phy/fixed.c. Signed-off-by: Anton Vorontsov [EMAIL PROTECTED] Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] what's the context of this patchset? 2.6.25? it's late for 2.6.24-rc, IMO. Do I have the latest version (sent Nov 26 @ 9:29am)? Jeff -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv7 iproute2 2/2] Interface group as new ip link option
Laszlo Attila Toth wrote, On 11/29/2007 05:11 PM: Interfaces can be grouped and each group has an unique positive integer ID. It can be set via ip link. Symbolic names can be specified in /etc/iproute2/rt_ifgroup. Any value of unsigned int32 is valid. ... diff --git a/lib/rt_names.c b/lib/rt_names.c index 8d019a0..ec6638c 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -439,10 +439,72 @@ int rtnl_dsfield_a2n(__u32 *id, char *arg) } } - res = strtoul(arg, end, 16); + res = strtoul(arg, end, 0); Won't this break any scripts? Jarek P. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Arjan van de Ven wrote: On Sat, 01 Dec 2007 15:21:12 -0500 Mark Lord [EMAIL PROTECTED] wrote: Eric W. Biederman wrote: Stephen Hemminger [EMAIL PROTECTED] writes: Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. .. And what should code be passing in when # CONFIG_NET_NS is not set ? network drivers probably really really don't want to call dev_get_by_XXX... .. Fine. But all of them want to call sk_alloc(), and many want to do register_netdev(). So what should they be using there ? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: namespace support requires network modules to say GPL
Arjan van de Ven wrote: On Sat, 01 Dec 2007 15:21:12 -0500 Mark Lord [EMAIL PROTECTED] wrote: Eric W. Biederman wrote: Stephen Hemminger [EMAIL PROTECTED] writes: Sure. We keep the updated dev_get_by_ that takes a network namespace parameter. .. And what should code be passing in when # CONFIG_NET_NS is not set ? network drivers probably really really don't want to call dev_get_by_XXX... .. Fine. But all of them want to call sk_alloc(), and many want to do register_netdev(). So what should they be using there ? And please STOP trimming the CC list. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
On Sat, 01 Dec 2007 16:59:52 -0500 Jeff Garzik wrote: Vitaly Bordug wrote: With that patch fixed.c now fully emulates MDIO bus, thus no need to duplicate PHY layer functionality. That, in turn, drastically simplifies the code, and drops down line count. As an additional bonus, now there is no need to register MDIO bus for each PHY, all emulated PHYs placed on the platform fixed MDIO bus. There is also no more need to pre-allocate PHYs via .config option, this is all now handled dynamically. p.s. Don't even try to understand patch content! Better: apply patch and look into resulting drivers/net/phy/fixed.c. Signed-off-by: Anton Vorontsov [EMAIL PROTECTED] Signed-off-by: Vitaly Bordug [EMAIL PROTECTED] what's the context of this patchset? 2.6.25? Fine with it. it's late for 2.6.24-rc, IMO. Do I have the latest version (sent Nov 26 @ 9:29am)? yes, that's it. -- Sincerely, Vitaly -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [NET] phy/fixed.c: rework to not duplicate PHY layer functionality
On Sun, 2 Dec 2007 00:34:03 +0300 Anton Vorontsov wrote: If i understand your code correctly, you seem to rely on the fact that fixed_phy_add() is called before the fixed MDIO bus is scanned for devices. Yes, indeed. The other name of fixed phys are platform phys or platform MDIO bus on which virtual PHYs are placed. That is, these phys supposed to be created by the platform setup code (arch/). The rationale here is: we do hardware emulation, thus to make drivers actually see that hardware, we have to create it early. well that was the intention but... The point is - as device is emulated, (nearly) everything is doable, and the only tradeoff to consider, is how far will we go with that emulation. IOW, PHYlib could be tricked to do the right thing, and I thought about adding module flexibility... But thinking more about it, it seems that BSP-code-phy-creation just sucks less and is clear enough yet flexible. -- Sincerely, Vitaly -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/21] [TCP]: Move LOSTRETRANS MIB outside !(L|S) check
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Usually those skbs will have L set, not counting them as lost retransmissions is misleading. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9f12541..ba05e16 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1160,8 +1160,8 @@ static int tcp_mark_lost_retrans(struct sock *sk) tp-lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)-sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } + NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) new_low_seq = ack_seq; -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-2.6.25 0/21]: TCP tweaks RB-tree WIP preview
Hi all, First, there are some rather trivial ones (up to [TCP]: Cleanup local variables of clean _rtx_queue)... They're followed by my current efforts to improve SACK processing latencies with large windows, including RB-tree for fast searching, problem space split to provide tight bounds that relate to the amount of new information discovered through the received SACK block, per skb fack_count to provide access to fack_count at search provided entry point. There's still need for additional changes after these to really provide such good bounds without loop-holes but this is a big step in the right direction already. Cost to store all this, seems tremendous, however, the new structures enable also dropping of many existing caches, making the end result much nicer (these are not yet done). Potential kill list includes at least: most _hints drop, recv_sack_cache drop, highest_sack skb-seqno restore... Also the linked-list for SACKed part is probably unnecessary but having it makes things simpler, it can be killed later on rather than adding complexity to this patch. ...I would like to still take some time to make the last patch cleaner by extracting at least the DSACK separation from it. Though, the amount of necessary changes remains still relatively huge in the core patch. ...Minor FIXMEs todo, only the DSACK one is really preventive one. Comments welcome, especially about the last patch. -- i. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/21] [TCP]: Remove superflucious FLAG_DATA_SACKED
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] To get there, highest_sack must have advanced. When it advances, a new skb is SACKed, which already sets that FLAG. Besides, the original purpose of it has puzzled me, never understood why LOST bit setting of retransmitted skb is marked with FLAG_DATA_SACKED. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c | 10 +++--- 1 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ba05e16..6986a2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1118,12 +1118,11 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, * highest SACK block). Also calculate the lowest snd_nxt among the remaining * retransmitted skbs to avoid some costly processing per ACKs. */ -static int tcp_mark_lost_retrans(struct sock *sk) +static void tcp_mark_lost_retrans(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int flag = 0; int cnt = 0; u32 new_low_seq = tp-snd_nxt; u32 received_upto = TCP_SKB_CB(tp-highest_sack)-end_seq; @@ -1131,7 +1130,7 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (!tcp_is_fack(tp) || !tp-retrans_out || !after(received_upto, tp-lost_retrans_low) || icsk-icsk_ca_state != TCP_CA_Recovery) - return flag; + return; tcp_for_write_queue(skb, sk) { u32 ack_seq = TCP_SKB_CB(skb)-ack_seq; @@ -1159,7 +1158,6 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (!(TCP_SKB_CB(skb)-sacked (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp-lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)-sacked |= TCPCB_LOST; - flag |= FLAG_DATA_SACKED; } NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } else { @@ -1171,8 +1169,6 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (tp-retrans_out) tp-lost_retrans_low = new_low_seq; - - return flag; } static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, @@ -1603,7 +1599,7 @@ advance_sp: for (j = 0; j used_sacks; j++) tp-recv_sack_cache[i++] = sp[j]; - flag |= tcp_mark_lost_retrans(sk); + tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/21] [TCP] Cong.ctrl modules: remove unused good_ack from cong_avoid
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h|4 ++-- net/ipv4/tcp_bic.c |3 +-- net/ipv4/tcp_cong.c |2 +- net/ipv4/tcp_cubic.c |3 +-- net/ipv4/tcp_highspeed.c |3 +-- net/ipv4/tcp_htcp.c |3 +-- net/ipv4/tcp_hybla.c |5 ++--- net/ipv4/tcp_illinois.c |3 +-- net/ipv4/tcp_input.c |9 - net/ipv4/tcp_lp.c|4 ++-- net/ipv4/tcp_scalable.c |3 +-- net/ipv4/tcp_vegas.c |7 +++ net/ipv4/tcp_veno.c |7 +++ net/ipv4/tcp_yeah.c |3 +-- 14 files changed, 24 insertions(+), 35 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 7e58326..cdd0050 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -652,7 +652,7 @@ struct tcp_congestion_ops { /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -683,7 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); -extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight); extern u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 5dba0fc..5212ed9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca-cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 55fca18..4451750 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -324,7 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 80bd084..3aa0b23 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca-cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 14a073d..8b6caaf 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk) tp-snd_cwnd_clamp = min_t(u32, tp-snd_cwnd_clamp, 0x/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, -u32 in_flight, int data_acked) +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 5215691..af99776 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp-snd_cwnd * ca-beta) 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index b3e55cf..44618b6 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments 1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca =
[PATCH 03/21] [TCP]: Unite identical code from two seqno split blocks
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Bogus seqno compares just mislead, the code is identical for both sides of the seqno compare (and was even executed just once because of return in between). Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c |7 +-- 1 files changed, 1 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6986a2d..29268df 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1246,8 +1246,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (dup_sack (sacked TCPCB_RETRANS)) { if (after(TCP_SKB_CB(skb)-end_seq, tp-undo_marker)) tp-undo_retrans--; - if (!after(TCP_SKB_CB(skb)-end_seq, tp-snd_una) - (sacked TCPCB_SACKED_ACKED)) + if (sacked TCPCB_SACKED_ACKED) *reord = min(fack_count, *reord); } @@ -1310,10 +1309,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (after(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp))) tp-highest_sack = skb; - - } else { - if (dup_sack (sacked TCPCB_RETRANS)) - *reord = min(fack_count, *reord); } /* D-SACK. We can detect redundant retransmission in S|R and plain R -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/21] [TCP] MTUprobe: prepare skb fields earlier
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] They better be valid when call to write_queue functions is made once things that follow are going in. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_output.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f8266f9..4cb4a7f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1343,7 +1343,6 @@ static int tcp_mtu_probe(struct sock *sk) sk_charge_skb(sk, nskb); skb = tcp_send_head(sk); - tcp_insert_write_queue_before(nskb, skb, sk); TCP_SKB_CB(nskb)-seq = TCP_SKB_CB(skb)-seq; TCP_SKB_CB(nskb)-end_seq = TCP_SKB_CB(skb)-seq + probe_size; @@ -1352,6 +1351,8 @@ static int tcp_mtu_probe(struct sock *sk) nskb-csum = 0; nskb-ip_summed = skb-ip_summed; + tcp_insert_write_queue_before(nskb, skb, sk); + len = 0; while (len probe_size) { next = tcp_write_queue_next(sk, skb); -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/21] [TCP]: Add unlikely() to urgent handling in clean_rtx_queue
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f0b6c7..365c6d4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2822,8 +2822,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, if (sacked TCPCB_LOST) tp-lost_out -= packets_acked; - if ((sacked TCPCB_URG) tp-urg_mode - !before(end_seq, tp-snd_up)) + if (unlikely((sacked TCPCB_URG) tp-urg_mode +!before(end_seq, tp-snd_up))) tp-urg_mode = 0; tp-packets_out -= packets_acked; -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/21] [TCP]: Cleanup local variables of clean_rtx_queue
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c | 26 -- 1 files changed, 12 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 365c6d4..a43c897 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2765,8 +2765,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, u32 now = tcp_time_stamp; int fully_acked = 1; int flag = 0; - int prior_packets = tp-packets_out; - u32 cnt = 0; + u32 pkts_acked = 0; u32 reord = tp-packets_out; s32 seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); @@ -2774,7 +2773,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, while ((skb = tcp_write_queue_head(sk)) skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u32 end_seq; - u32 packets_acked; + u32 acked_pcount; u8 sacked = scb-sacked; if (after(scb-end_seq, tp-snd_una)) { @@ -2782,14 +2781,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, !after(tp-snd_una, scb-seq)) break; - packets_acked = tcp_tso_acked(sk, skb); - if (!packets_acked) + acked_pcount = tcp_tso_acked(sk, skb); + if (!acked_pcount) break; fully_acked = 0; end_seq = tp-snd_una; } else { - packets_acked = tcp_skb_pcount(skb); + acked_pcount = tcp_skb_pcount(skb); end_seq = scb-end_seq; } @@ -2801,11 +2800,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, if (sacked TCPCB_RETRANS) { if (sacked TCPCB_SACKED_RETRANS) - tp-retrans_out -= packets_acked; + tp-retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; if ((flag FLAG_DATA_ACKED) || - (packets_acked 1)) + (acked_pcount 1)) flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { if (seq_rtt 0) { @@ -2814,20 +2813,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, last_ackt = skb-tstamp; } if (!(sacked TCPCB_SACKED_ACKED)) - reord = min(cnt, reord); + reord = min(pkts_acked, reord); } if (sacked TCPCB_SACKED_ACKED) - tp-sacked_out -= packets_acked; + tp-sacked_out -= acked_pcount; if (sacked TCPCB_LOST) - tp-lost_out -= packets_acked; + tp-lost_out -= acked_pcount; if (unlikely((sacked TCPCB_URG) tp-urg_mode !before(end_seq, tp-snd_up))) tp-urg_mode = 0; - tp-packets_out -= packets_acked; - cnt += packets_acked; + tp-packets_out -= acked_pcount; + pkts_acked += acked_pcount; /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -2852,7 +2851,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, } if (flag FLAG_ACKED) { - u32 pkts_acked = prior_packets - tp-packets_out; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)-icsk_ca_ops; -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/21] [TCP]: Remove duplicated code block from clean_rtx_queue
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_input.c | 48 1 files changed, 20 insertions(+), 28 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ed2077c..3f0b6c7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2799,41 +2799,33 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, tcp_mtup_probe_success(sk, skb); } - if (sacked) { - if (sacked TCPCB_RETRANS) { - if (sacked TCPCB_SACKED_RETRANS) - tp-retrans_out -= packets_acked; - flag |= FLAG_RETRANS_DATA_ACKED; - seq_rtt = -1; - if ((flag FLAG_DATA_ACKED) || - (packets_acked 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; - } else { - if (seq_rtt 0) { - seq_rtt = now - scb-when; - if (fully_acked) - last_ackt = skb-tstamp; - } - if (!(sacked TCPCB_SACKED_ACKED)) - reord = min(cnt, reord); - } - - if (sacked TCPCB_SACKED_ACKED) - tp-sacked_out -= packets_acked; - if (sacked TCPCB_LOST) - tp-lost_out -= packets_acked; - - if ((sacked TCPCB_URG) tp-urg_mode - !before(end_seq, tp-snd_up)) - tp-urg_mode = 0; + if (sacked TCPCB_RETRANS) { + if (sacked TCPCB_SACKED_RETRANS) + tp-retrans_out -= packets_acked; + flag |= FLAG_RETRANS_DATA_ACKED; + seq_rtt = -1; + if ((flag FLAG_DATA_ACKED) || + (packets_acked 1)) + flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { if (seq_rtt 0) { seq_rtt = now - scb-when; if (fully_acked) last_ackt = skb-tstamp; } - reord = min(cnt, reord); + if (!(sacked TCPCB_SACKED_ACKED)) + reord = min(cnt, reord); } + + if (sacked TCPCB_SACKED_ACKED) + tp-sacked_out -= packets_acked; + if (sacked TCPCB_LOST) + tp-lost_out -= packets_acked; + + if ((sacked TCPCB_URG) tp-urg_mode + !before(end_seq, tp-snd_up)) + tp-urg_mode = 0; + tp-packets_out -= packets_acked; cnt += packets_acked; -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/21] [TCP]: Add tcp_for_write_queue_from_safe and use it in mtu_probe
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h |5 + net/ipv4/tcp_output.c |8 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cdd0050..6e392ba 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1229,6 +1229,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu for (; (skb != (struct sk_buff *)(sk)-sk_write_queue);\ skb = skb-next) +#define tcp_for_write_queue_from_safe(skb, tmp, sk)\ + for (tmp = skb-next; \ +(skb != (struct sk_buff *)(sk)-sk_write_queue); \ +skb = tmp, tmp = skb-next) + static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk-sk_send_head; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20365c0..4f2bd70 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1351,9 +1351,7 @@ static int tcp_mtu_probe(struct sock *sk) tcp_insert_write_queue_before(nskb, skb, sk); len = 0; - while (len probe_size) { - next = tcp_write_queue_next(sk, skb); - + tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb-len, probe_size - len); if (nskb-ip_summed) skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); @@ -1382,7 +1380,9 @@ static int tcp_mtu_probe(struct sock *sk) } len += copy; - skb = next; + + if (len = probe_size) + break; } tcp_init_tso_segs(sk, nskb, nskb-len); -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/21] [TCP]: Introduce per skb fack_counts to retransmit queue
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] The fack count of any skb in the retransmit queue at any given point in time is: (skb-fack_count - head_skb-fack_count) And we'll use this in the SACK processing loops and possibly elsewhere too. Original idea came from David S. Miller, included couple of bug fixes from Tom Quetchenbach [EMAIL PROTECTED]. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h | 41 + 1 files changed, 41 insertions(+), 0 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ec1cac..967f256 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -555,6 +555,7 @@ struct tcp_skb_cb { __u32 seq;/* Starting sequence number */ __u32 end_seq;/* SEQ + FIN + SYN + datalen*/ __u32 when; /* used to compute rtt's*/ + unsigned intfack_count; /* speed up SACK processing */ __u8flags; /* TCP header flags.*/ /* NOTE: These must match up to the flags byte in a @@ -1220,6 +1221,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb-next; } +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb-prev; +} + #define tcp_for_write_queue(skb, sk) \ for (skb = (sk)-sk_write_queue.next; \ (skb != (struct sk_buff *)(sk)-sk_write_queue); \ @@ -1241,6 +1247,11 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { + struct sk_buff *prev = tcp_write_queue_prev(sk, skb); + + TCP_SKB_CB(skb)-fack_count = TCP_SKB_CB(prev)-fack_count + + tcp_skb_pcount(prev); + sk-sk_send_head = skb-next; if (sk-sk_send_head == (struct sk_buff *)sk-sk_write_queue) sk-sk_send_head = NULL; @@ -1259,6 +1270,7 @@ static inline void tcp_init_send_head(struct sock *sk) static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) { + TCP_SKB_CB(skb)-fack_count = 0; __skb_queue_tail(sk-sk_write_queue, skb); } @@ -1275,9 +1287,36 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb } } +/* This is only used for tcp_send_synack(), so the write queue should + * be empty. If that stops being true, the fack_count assignment + * will need to be more elaborate. + */ static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) { + BUG_ON(!skb_queue_empty(sk-sk_write_queue)); __skb_queue_head(sk-sk_write_queue, skb); + TCP_SKB_CB(skb)-fack_count = 0; +} + +/* An insert into the middle of the write queue causes the fack + * counts in subsequent packets to become invalid, fix them up. + */ +static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *prev = skb-prev; + unsigned int fc = 0; + + if (prev != (struct sk_buff *) sk-sk_write_queue) + fc = TCP_SKB_CB(prev)-fack_count + tcp_skb_pcount(prev); + + tcp_for_write_queue_from(skb, sk) { + if (!before(TCP_SKB_CB(skb)-seq, tcp_sk(sk)-snd_nxt) || + TCP_SKB_CB(skb)-fack_count == fc) + break; + + TCP_SKB_CB(skb)-fack_count = fc; + fc += tcp_skb_pcount(skb); + } } /* Insert buff after skb on the write queue of sk. */ @@ -1286,6 +1325,7 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, struct sock *sk) { __skb_append(skb, buff, sk-sk_write_queue); + tcp_reset_fack_counts(sk, buff); } /* Insert skb between prev and next on the write queue of sk. */ @@ -1294,6 +1334,7 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sock *sk) { __skb_insert(new, skb-prev, skb, sk-sk_write_queue); + tcp_reset_fack_counts(sk, new); if (sk-sk_send_head == skb) sk-sk_send_head = new; -- 1.5.0.6 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/21] [TCP]: Abstract tp-highest_sack accessing point to next skb
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Pointing to the next skb is necessary to avoid referencing already SACKed skbs which will soon be on a separate list. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h | 35 ++- net/ipv4/tcp_input.c | 27 +++ net/ipv4/tcp_output.c | 11 +++ 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6e392ba..5ec1cac 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1267,8 +1267,12 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk-sk_send_head == NULL) + if (sk-sk_send_head == NULL) { sk-sk_send_head = skb; + + if (tcp_sk(sk)-highest_sack == NULL) + tcp_sk(sk)-highest_sack = skb; + } } static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) @@ -1318,9 +1322,38 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) { if (!tp-sacked_out) return tp-snd_una; + + if (tp-highest_sack == NULL) + return tp-snd_nxt; + return TCP_SKB_CB(tp-highest_sack)-seq; } +static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) +{ + tcp_sk(sk)-highest_sack = tcp_skb_is_last(sk, skb) ? NULL : + tcp_write_queue_next(sk, skb); +} + +static inline struct sk_buff *tcp_highest_sack(struct sock *sk) +{ + return tcp_sk(sk)-highest_sack; +} + +static inline void tcp_highest_sack_reset(struct sock *sk) +{ + tcp_sk(sk)-highest_sack = tcp_write_queue_head(sk); +} + +/* Called when old skb is about to be deleted (to be combined with new skb) */ +static inline void tcp_highest_sack_combine(struct sock *sk, + struct sk_buff *old, + struct sk_buff *new) +{ + if (tcp_sk(sk)-sacked_out (old == tcp_sk(sk)-highest_sack)) + tcp_sk(sk)-highest_sack = new; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a43c897..35753b7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1125,7 +1125,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) struct sk_buff *skb; int cnt = 0; u32 new_low_seq = tp-snd_nxt; - u32 received_upto = TCP_SKB_CB(tp-highest_sack)-end_seq; + u32 received_upto = tcp_highest_sack_seq(tp); if (!tcp_is_fack(tp) || !tp-retrans_out || !after(received_upto, tp-lost_retrans_low) || @@ -1236,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, +static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, int *reord, int dup_sack, int fack_count) { + struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)-sacked; int flag = 0; @@ -1307,8 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (fack_count tp-fackets_out) tp-fackets_out = fack_count; - if (after(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp))) - tp-highest_sack = skb; + if (!before(TCP_SKB_CB(skb)-seq, tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1330,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, int dup_sack_in, int *fack_count, int *reord, int *flag) { - struct tcp_sock *tp = tcp_sk(sk); - tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1358,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) - *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count); + *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count); *fack_count += tcp_skb_pcount(skb); } @@ -1429,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!tp-sacked_out) { if (WARN_ON(tp-fackets_out)) tp-fackets_out = 0; - tp-highest_sack = tcp_write_queue_head(sk); + tcp_highest_sack_reset(sk);
[PATCH 14/21] [TCP]: Added queue parameter to _for_write_queue helpers
From: =?ISO-8859-1?q?Ilpo_J=E4rvinen?= [EMAIL PROTECTED] Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- include/net/tcp.h |8 net/ipv4/tcp_input.c | 18 +- net/ipv4/tcp_output.c |8 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 433c6a6..0883697 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1232,16 +1232,16 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu return skb-prev; } -#define tcp_for_write_queue(skb, sk) \ +#define tcp_for_write_queue(skb, sk, queue)\ for (skb = (sk)-sk_write_queue.next; \ (skb != (struct sk_buff *)(sk)-sk_write_queue); \ skb = skb-next) -#define tcp_for_write_queue_from(skb, sk) \ +#define tcp_for_write_queue_from(skb, sk, queue) \ for (; (skb != (struct sk_buff *)(sk)-sk_write_queue);\ skb = skb-next) -#define tcp_for_write_queue_from_safe(skb, tmp, sk)\ +#define tcp_for_write_queue_from_safe(skb, tmp, sk, queue) \ for (tmp = skb-next; \ (skb != (struct sk_buff *)(sk)-sk_write_queue); \ skb = tmp, tmp = skb-next) @@ -1364,7 +1364,7 @@ static inline void tcp_reset_fack_counts(struct sock *sk, struct sk_buff *skb) if (prev != (struct sk_buff *) sk-sk_write_queue) fc = TCP_SKB_CB(prev)-fack_count + tcp_skb_pcount(prev); - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { if (!before(TCP_SKB_CB(skb)-seq, tcp_sk(sk)-snd_nxt) || TCP_SKB_CB(skb)-fack_count == fc) break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 35753b7..8a02de2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1132,7 +1132,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) icsk-icsk_ca_state != TCP_CA_Recovery) return; - tcp_for_write_queue(skb, sk) { + tcp_for_write_queue(skb, sk, 0) { u32 ack_seq = TCP_SKB_CB(skb)-ack_seq; if (skb == tcp_send_head(sk)) @@ -1331,7 +1331,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, int dup_sack_in, int *fack_count, int *reord, int *flag) { - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1370,7 +1370,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, u32 skip_to_seq) { - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { if (skb == tcp_send_head(sk)) break; @@ -1687,7 +1687,7 @@ int tcp_use_frto(struct sock *sk) skb = tcp_write_queue_head(sk); skb = tcp_write_queue_next(sk, skb);/* Skips head */ - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { if (skb == tcp_send_head(sk)) break; if (TCP_SKB_CB(skb)-sackedTCPCB_RETRANS) @@ -1794,7 +1794,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tcp_for_write_queue(skb, sk) { + tcp_for_write_queue(skb, sk, 0) { if (skb == tcp_send_head(sk)) break; @@ -1894,7 +1894,7 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_clear_all_retrans_hints(tp); } - tcp_for_write_queue(skb, sk) { + tcp_for_write_queue(skb, sk, 0) { if (skb == tcp_send_head(sk)) break; @@ -2145,7 +2145,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) cnt = 0; } - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { if (skb == tcp_send_head(sk)) break; /* TODO: do this better */ @@ -2200,7 +2200,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) skb = tp-scoreboard_skb_hint ? tp-scoreboard_skb_hint : tcp_write_queue_head(sk); - tcp_for_write_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk, 0) { if (skb ==