On Thu, 8 Sep 2016, Fenghua Yu wrote: > +/* > + * kernfs_root - find out the kernfs_root a kernfs_node belongs to > + * @kn: kernfs_node of interest > + * > + * Return the kernfs_root @kn belongs to. > + */ > +static inline struct kernfs_root *get_kernfs_root(struct kernfs_node *kn) > +{ > + if (kn->parent) > + kn = kn->parent;
So this is guaranteed to have a single nesting? > + return kn->dir.root; > +} > + > +/* > + * rdtgroup_file_mode - deduce file mode of a control file > + * @cft: the control file in question > + * > + * S_IRUGO for read, S_IWUSR for write. > + */ > +static umode_t rdtgroup_file_mode(const struct rftype *rft) > +{ > + umode_t mode = 0; > + > + if (rft->read_u64 || rft->read_s64 || rft->seq_show) > + mode |= S_IRUGO; > + > + if (rft->write_u64 || rft->write_s64 || rft->write) > + mode |= S_IWUSR; Why don't you store the mode in rtftype instead of evaluating it at runtime? Aside of that [read|write]_[s|u]64 are nowhere used in this whole patch series, but take plenty of storage and line space for nothing. > +static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts, > + const struct rftype *end) > +{ > + struct rftype *rft; > + int ret; > + > + lockdep_assert_held(&rdtgroup_mutex); > + > + for (rft = rfts; rft != end; rft++) { > + ret = rdtgroup_add_file(kn, rft); > + if (ret) { > + pr_warn("%s: failed to add %s, err=%d\n", > + __func__, rft->name, ret); > + rdtgroup_rm_files(kn, rft, end); So we remove the file which failed to be added along with those which we have not been added yet. rdtgroup_rm_files(kn, rfts, rft); Might be more correct, but I might be wrong as usual. > +/* > + * Get resource type from name in kernfs_node. This can be extended to > + * multi-resources (e.g. L2). Right now simply return RESOURCE_L3 because > + * we only have L3 support. That's crap. If you know that you have seperate types then spend the time to implement the storage instead of documenting your lazy/sloppyness. > + */ > +static enum resource_type get_kn_res_type(struct kernfs_node *kn) > +{ > + return RESOURCE_L3; > +} > + > +static int rdt_max_closid_show(struct seq_file *seq, void *v) > +{ > + struct kernfs_open_file *of = seq->private; > + > + switch (get_kn_res_type(of->kn)) { > + case RESOURCE_L3: > + seq_printf(seq, "%d\n", > + boot_cpu_data.x86_l3_max_closid); x86_l3_max_closid is u16 ..... %u ? And that line break is required because the line is seq_printf(seq, "%d\n", boot_cpu_data.x86_l3_max_closid); exactly 73 characters long .... > + break; > + default: > + break; > + } > + > + return 0; > +} > + > +static int rdt_max_cbm_len_show(struct seq_file *seq, void *v) > +{ > + struct kernfs_open_file *of = seq->private; > + > + switch (get_kn_res_type(of->kn)) { > + case RESOURCE_L3: > + seq_printf(seq, "%d\n", > + boot_cpu_data.x86_l3_max_cbm_len); Ditto > + break; > + default: > + break; > + } > + > + return 0; > +} > +static void rdt_info_show_cat(struct seq_file *seq, int level) > +{ > + int domain; > + int domain_num = get_domain_num(level); > + int closid; > + u64 cbm; > + struct clos_cbm_table **cctable; > + int maxid; > + int shared_domain; > + int cnt; Soon you occupy half of the screen. > + if (level == CACHE_LEVEL3) > + cctable = l3_cctable; > + else > + return; > + > + maxid = cconfig.max_closid; > + for (domain = 0; domain < domain_num; domain++) { > + seq_printf(seq, "domain %d:\n", domain); > + shared_domain = get_shared_domain(domain, level); > + for (closid = 0; closid < maxid; closid++) { > + int dindex, iindex; > + > + if (test_bit(closid, > + (unsigned long *)cconfig.closmap[shared_domain])) { > + dindex = get_dcbm_table_index(closid); > + cbm = cctable[domain][dindex].cbm; > + cnt = cctable[domain][dindex].clos_refcnt; > + seq_printf(seq, "cbm[%d]=%lx, refcnt=%d\n", > + dindex, (unsigned long)cbm, cnt); > + if (cdp_enabled) { > + iindex = get_icbm_table_index(closid); > + cbm = cctable[domain][iindex].cbm; > + cnt = > + cctable[domain][iindex].clos_refcnt; > + seq_printf(seq, > + "cbm[%d]=%lx, refcnt=%d\n", > + iindex, (unsigned long)cbm, > + cnt); > + } > + } else { > + cbm = max_cbm(level); > + cnt = 0; > + dindex = get_dcbm_table_index(closid); > + seq_printf(seq, "cbm[%d]=%lx, refcnt=%d\n", > + dindex, (unsigned long)cbm, cnt); > + if (cdp_enabled) { > + iindex = get_icbm_table_index(closid); > + seq_printf(seq, > + "cbm[%d]=%lx, refcnt=%d\n", > + iindex, (unsigned long)cbm, > + cnt); > + } This is completely unreadable. Split it into static functions .... > + } > + } > + } > +} > + > +static void show_shared_domain(struct seq_file *seq) > +{ > + int domain; > + > + seq_puts(seq, "Shared domains:\n"); > + > + for_each_cache_domain(domain, 0, shared_domain_num) { > + struct shared_domain *sd; > + > + sd = &shared_domain[domain]; > + seq_printf(seq, "domain[%d]:", domain); > + if (cat_enabled(CACHE_LEVEL3)) > + seq_printf(seq, "l3_domain=%d ", sd->l3_domain); > + seq_printf(seq, "cpumask=%*pb\n", > + cpumask_pr_args(&sd->cpumask)); What's the value of printing a cpu mask for something which is not enabled? > + } > +} > + > +static int rdt_info_show(struct seq_file *seq, void *v) > +{ > + show_shared_domain(seq); > + > + if (cat_l3_enabled) { > + if (rdt_opts.verbose) Concatenate the conditionals into a single line please. > + rdt_info_show_cat(seq, CACHE_LEVEL3); > + } > + > + seq_puts(seq, "\n"); > + > + return 0; > +} > + > +static int res_type_to_level(enum resource_type res_type, int *level) > +{ > + int ret = 0; > + > + switch (res_type) { > + case RESOURCE_L3: > + *level = CACHE_LEVEL3; > + break; > + case RESOURCE_NUM: > + ret = -EINVAL; > + break; > + } > + > + return ret; Groan. What's wrong with static int res_type_to_level(type) { switch (type) { case RESOURCE_L3: return CACHE_LEVEL3; case RESOURCE_NUM: return -EINVAL; } } and at the callsite you do: > +} > + > +static int domain_to_cache_id_show(struct seq_file *seq, void *v) > +{ > + struct kernfs_open_file *of = seq->private; > + enum resource_type res_type; > + int domain; > + int leaf; > + int level = 0; > + int ret; > + > + res_type = (enum resource_type)of->kn->parent->priv; > + > + ret = res_type_to_level(res_type, &level); > + if (ret) > + return 0; level = res_type_to_level(res_type); if (level < 0) return 0; That gets rid of the initialization of level as well and becomes readable source code. Hmm? > + > + leaf = get_cache_leaf(level, 0); leafidx = cache_get_leaf_index(...); I trip over this over and over and I can't get used to this misnomer. > + > + for (domain = 0; domain < get_domain_num(level); domain++) { > + unsigned int cid; > + > + cid = cache_domains[leaf].shared_cache_id[domain]; > + seq_printf(seq, "%d:%d\n", domain, cid); Proper print qualifiers are overrated.... > +static int info_populate_dir(struct kernfs_node *kn) > +{ > + struct rftype *rfts; > + > + rfts = info_files; struct rftype *rfts = info_files; > + return rdtgroup_add_files(kn, rfts, rfts + ARRAY_SIZE(info_files)); > +} > +static int rdtgroup_partition_populate_dir(struct kernfs_node *kn) Has no user. > +LIST_HEAD(rdtgroup_lists); I told you before that globals or module static variable don't get defined in the middle of the code and not being sticked to a function definition w/o a space. > +static void init_rdtgroup_root(struct rdtgroup_root *root) > +{ > + struct rdtgroup *rdtgrp = &root->rdtgrp; > + > + INIT_LIST_HEAD(&rdtgrp->rdtgroup_list); > + list_add_tail(&rdtgrp->rdtgroup_list, &rdtgroup_lists); > + atomic_set(&root->nr_rdtgrps, 1); > + rdtgrp->root = root; Yuck. grp = root->grp; init(grp); root->nr_grps = 1; grp->root = root; Confused. > +} > + > +static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops; > +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) > +{ > + struct rdtgroup *rdtgrp; > + > + if (kernfs_type(kn) == KERNFS_DIR) > + rdtgrp = kn->priv; > + else > + rdtgrp = kn->parent->priv; So this again assumes that there is a single level of directories.... > + kernfs_break_active_protection(kn); > + > + mutex_lock(&rdtgroup_mutex); > + /* Unlock if rdtgrp is dead. */ > + if (!rdtgrp) > + rdtgroup_kn_unlock(kn); > + > + return rdtgrp; > +} > + > +void rdtgroup_kn_unlock(struct kernfs_node *kn) > +{ > + mutex_unlock(&rdtgroup_mutex); > + > + kernfs_unbreak_active_protection(kn); > +} > + > +static char *res_info_dir_name(enum resource_type res_type, char *name) > +{ > + switch (res_type) { > + case RESOURCE_L3: > + strncpy(name, "l3", RDTGROUP_FILE_NAME_LEN); > + break; > + default: > + break; > + } > + > + return name; What's the purpose of this return value if its ignored at the call site? > +} > + > +static int create_res_info(enum resource_type res_type, > + struct kernfs_node *parent_kn) > +{ > + struct kernfs_node *kn; > + char name[RDTGROUP_FILE_NAME_LEN]; > + int ret; > + > + res_info_dir_name(res_type, name); So name contains random crap if res_type is not handled in res_info_dir_name(). > + kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, NULL); > + if (IS_ERR(kn)) { > + ret = PTR_ERR(kn); > + goto out; > + } > + > + /* > + * This extra ref will be put in kernfs_remove() and guarantees > + * that @rdtgrp->kn is always accessible. > + */ > + kernfs_get(kn); > + > + ret = rdtgroup_kn_set_ugid(kn); > + if (ret) > + goto out_destroy; > + > + ret = res_info_populate_dir(kn); > + if (ret) > + goto out_destroy; > + > + kernfs_activate(kn); > + > + ret = 0; > + goto out; Hell no. > + > +out_destroy: > + kernfs_remove(kn); > +out: > + return ret; > + > +} > + > +static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn, > + const char *name) > +{ > + struct kernfs_node *kn; > + int ret; > + > + if (parent_kn != root_rdtgrp->kn) > + return -EPERM; > + > + /* create the directory */ > + kn = kernfs_create_dir(parent_kn, "info", parent_kn->mode, root_rdtgrp); > + if (IS_ERR(kn)) { > + ret = PTR_ERR(kn); > + goto out; > + } > + > + ret = info_populate_dir(kn); > + if (ret) > + goto out_destroy; > + > + if (cat_enabled(CACHE_LEVEL3)) > + create_res_info(RESOURCE_L3, kn); > + > + /* > + * This extra ref will be put in kernfs_remove() and guarantees > + * that @rdtgrp->kn is always accessible. > + */ > + kernfs_get(kn); > + > + ret = rdtgroup_kn_set_ugid(kn); > + if (ret) > + goto out_destroy; > + > + kernfs_activate(kn); > + > + ret = 0; > + goto out; Copy and paste .... sucks. > +out_destroy: > + kernfs_remove(kn); > +out: > + return ret; > +} > + > +static int rdtgroup_setup_root(struct rdtgroup_root *root, > + unsigned long ss_mask) > +{ > + int ret; > + > + root_rdtgrp = &root->rdtgrp; > + > + lockdep_assert_held(&rdtgroup_mutex); > + > + root->kf_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, > + KERNFS_ROOT_CREATE_DEACTIVATED, > + root_rdtgrp); > + if (IS_ERR(root->kf_root)) { > + ret = PTR_ERR(root->kf_root); > + goto out; > + } > + root_rdtgrp->kn = root->kf_root->kn; > + > + ret = rdtgroup_populate_dir(root->kf_root->kn); > + if (ret) > + goto destroy_root; > + > + rdtgroup_create_info_dir(root->kf_root->kn, "info_dir"); > + > + /* > + * Link the root rdtgroup in this hierarchy into all the css_set css_set objects ? Again: Copy and paste sucks, when done without brain involvement. > + * objects. > + */ > + WARN_ON(atomic_read(&root->nr_rdtgrps) != 1); > + > + kernfs_activate(root_rdtgrp->kn); > + ret = 0; > + goto out; > + > +destroy_root: > + kernfs_destroy_root(root->kf_root); > + root->kf_root = NULL; > +out: > + return ret; > +} > +static int get_shared_cache_id(int cpu, int level) > +{ > + struct cpuinfo_x86 *c; > + int index_msb; > + struct cpu_cacheinfo *this_cpu_ci; > + struct cacheinfo *this_leaf; > + > + this_cpu_ci = get_cpu_cacheinfo(cpu); Once more. this_cpu_ci is actively misleading. > + > + this_leaf = this_cpu_ci->info_list + level_to_leaf(level); > + return this_leaf->id; > + return c->apicid >> index_msb; > +} > +static void init_cache_domain(int cpu, int leaf) > +{ > + struct cpu_cacheinfo *this_cpu_ci; > + struct cpumask *mask; > + unsigned int level; > + struct cacheinfo *this_leaf; > + int domain; > + > + this_cpu_ci = get_cpu_cacheinfo(cpu); > + this_leaf = this_cpu_ci->info_list + leaf; > + cache_domains[leaf].level = this_leaf->level; > + mask = &this_leaf->shared_cpu_map; > + for (domain = 0; domain < MAX_CACHE_DOMAINS; domain++) { > + if (cpumask_test_cpu(cpu, > + &cache_domains[leaf].shared_cpu_map[domain])) > + return; > + } > + if (domain == MAX_CACHE_DOMAINS) { > + domain = cache_domains[leaf].max_cache_domains_num++; > + > + cache_domains[leaf].shared_cpu_map[domain] = *mask; > + > + level = cache_domains[leaf].level; > + cache_domains[leaf].shared_cache_id[domain] = > + get_shared_cache_id(cpu, level); I've seen similar code in the other file. Why do we need two incarnations of that? Can't we have a shared cache domain information storage where all info is kept for both the control and the user space interface? > + } > +} > + > +static __init void init_cache_domains(void) > +{ > + int cpu; > + int leaf; > + > + for (leaf = 0; leaf < get_cpu_cacheinfo(0)->num_leaves; leaf++) { > + for_each_online_cpu(cpu) > + init_cache_domain(cpu, leaf); What updates this stuff on hotplug? > + } > +} > + > +void rdtgroup_exit(struct task_struct *tsk) > +{ > + > + if (!list_empty(&tsk->rg_list)) { I told you last time that rg_list is a misnomer .... > + struct rdtgroup *rdtgrp = tsk->rdtgroup; > + > + list_del_init(&tsk->rg_list); > + tsk->rdtgroup = NULL; > + atomic_dec(&rdtgrp->refcount); And there is still no sign of documentation on how that list is used and protected. > + } > +} > + > +static void rdtgroup_destroy_locked(struct rdtgroup *rdtgrp) > + __releases(&rdtgroup_mutex) __acquires(&rdtgroup_mutex) Where? > +{ > + int shared_domain; > + int closid; > + > + lockdep_assert_held(&rdtgroup_mutex); > + > + /* free closid occupied by this rdtgroup. */ > + for_each_cache_domain(shared_domain, 0, shared_domain_num) { > + closid = rdtgrp->resource.closid[shared_domain]; > + closid_put(closid, shared_domain); > + } > + > + list_del_init(&rdtgrp->rdtgroup_list); > + > + /* > + * Remove @rdtgrp directory along with the base files. @rdtgrp has an > + * extra ref on its kn. > + */ > + kernfs_remove(rdtgrp->kn); > +} > + > +static int > +rdtgroup_move_task_all(struct rdtgroup *src_rdtgrp, struct rdtgroup > *dst_rdtgrp) > +{ > + struct list_head *tasks; > + > + tasks = &src_rdtgrp->pset.tasks; > + while (!list_empty(tasks)) { list_for_each_entry_safe() ??? > + struct task_struct *tsk; > + struct list_head *pos; > + pid_t pid; > + int ret; > + > + pos = tasks->next; > + tsk = list_entry(pos, struct task_struct, rg_list); > + pid = tsk->pid; > + ret = rdtgroup_move_task(pid, dst_rdtgrp, false, NULL); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > +/* > + * Forcibly remove all of subdirectories under root. > + */ > +static void rmdir_all_sub(void) > +{ > + struct rdtgroup *rdtgrp; > + int cpu; > + struct list_head *l; > + struct task_struct *p; > + > + /* Move all tasks from sub rdtgroups to default */ > + rcu_read_lock(); > + for_each_process(p) { > + if (p->rdtgroup) > + p->rdtgroup = NULL; > + } > + rcu_read_unlock(); And how is that protected against concurrent forks? > + > + while (!list_is_last(&root_rdtgrp->rdtgroup_list, &rdtgroup_lists)) { > + l = rdtgroup_lists.next; > + if (l == &root_rdtgrp->rdtgroup_list) > + l = l->next; > + > + rdtgrp = list_entry(l, struct rdtgroup, rdtgroup_list); > + if (rdtgrp == root_rdtgrp) > + continue; > + > + for_each_cpu(cpu, &rdtgrp->cpu_mask) > + per_cpu(cpu_rdtgroup, cpu) = root_rdtgrp; > + > + rdtgroup_destroy_locked(rdtgrp); > + } > +} > +static void *rdtgroup_seqfile_start(struct seq_file *seq, loff_t *ppos) > +{ > + return seq_rft(seq)->seq_start(seq, ppos); > +} > + > +static void *rdtgroup_seqfile_next(struct seq_file *seq, void *v, loff_t > *ppos) > +{ > + return seq_rft(seq)->seq_next(seq, v, ppos); > +} > + > +static void rdtgroup_seqfile_stop(struct seq_file *seq, void *v) > +{ > + seq_rft(seq)->seq_stop(seq, v); > +} > + > +static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) > +{ > + struct rftype *rft = seq_rft(m); > + > + if (rft->seq_show) > + return rft->seq_show(m, arg); > + return 0; > +} > + > +static struct kernfs_ops rdtgroup_kf_ops = { > + .atomic_write_len = PAGE_SIZE, > + .write = rdtgroup_file_write, > + .seq_start = rdtgroup_seqfile_start, > + .seq_next = rdtgroup_seqfile_next, > + .seq_stop = rdtgroup_seqfile_stop, > + .seq_show = rdtgroup_seqfile_show, > +}; And once more nothing uses this at all. So why is it there? > +static struct kernfs_ops rdtgroup_kf_single_ops = { > + .atomic_write_len = PAGE_SIZE, > + .write = rdtgroup_file_write, > + .seq_show = rdtgroup_seqfile_show, > +}; > + > +static void rdtgroup_exit_rftypes(struct rftype *rfts) > +{ > + struct rftype *rft; > + > + for (rft = rfts; rft->name[0] != '\0'; rft++) { > + /* free copy for custom atomic_write_len, see init_cftypes() */ > + if (rft->max_write_len && rft->max_write_len != PAGE_SIZE) > + kfree(rft->kf_ops); > + rft->kf_ops = NULL; > + > + /* revert flags set by rdtgroup core while adding @cfts */ > + rft->flags &= ~(__RFTYPE_ONLY_ON_DFL | __RFTYPE_NOT_ON_DFL); > + } > +} > + > +static int rdtgroup_init_rftypes(struct rftype *rfts) > +{ > + struct rftype *rft; > + > + for (rft = rfts; rft->name[0] != '\0'; rft++) { > + struct kernfs_ops *kf_ops; > + > + if (rft->seq_start) > + kf_ops = &rdtgroup_kf_ops; > + else > + kf_ops = &rdtgroup_kf_single_ops; Ditto. > + > + /* > + * Ugh... if @cft wants a custom max_write_len, we need to > + * make a copy of kf_ops to set its atomic_write_len. > + */ > + if (rft->max_write_len && rft->max_write_len != PAGE_SIZE) { > + kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL); > + if (!kf_ops) { > + rdtgroup_exit_rftypes(rfts); > + return -ENOMEM; > + } > + kf_ops->atomic_write_len = rft->max_write_len; No user either. Copy and paste once more ? > + } > + > + rft->kf_ops = kf_ops; > + } > + > + return 0; > +} > + > +/* > + * rdtgroup_init - rdtgroup initialization > + * > + * Register rdtgroup filesystem, and initialize any subsystems that didn't > + * request early init. > + */ > +int __init rdtgroup_init(void) > +{ > + int cpu; > + > + WARN_ON(rdtgroup_init_rftypes(rdtgroup_root_base_files)); > + > + WARN_ON(rdtgroup_init_rftypes(res_info_files)); > + WARN_ON(rdtgroup_init_rftypes(info_files)); > + > + WARN_ON(rdtgroup_init_rftypes(rdtgroup_partition_base_files)); > + mutex_lock(&rdtgroup_mutex); > + > + init_rdtgroup_root(&rdtgrp_dfl_root); > + WARN_ON(rdtgroup_setup_root(&rdtgrp_dfl_root, 0)); > + > + mutex_unlock(&rdtgroup_mutex); > + > + WARN_ON(sysfs_create_mount_point(fs_kobj, "resctrl")); > + WARN_ON(register_filesystem(&rdt_fs_type)); > + init_cache_domains(); > + > + INIT_LIST_HEAD(&rdtgroups); > + > + for_each_online_cpu(cpu) > + per_cpu(cpu_rdtgroup, cpu) = root_rdtgrp; Another more for each cpu loop. Where is the hotplug update happening? Thanks, tglx