[PATCH v2 12/16] mm: multigenerational lru: eviction
The eviction consumes old generations. Given an lruvec, the eviction scans the pages on the per-zone lists indexed by either of min_seq[2]. It first tries to select a type based on the values of min_seq[2]. When anon and file types are both available from the same generation, it selects the one that has a lower refault rate. During a scan, the eviction sorts pages according to their generation numbers, if the aging has found them referenced. It also moves pages from the tiers that have higher refault rates than tier 0 to the next generation. When it finds all the per-zone lists of a selected type are empty, the eviction increments min_seq[2] indexed by this selected type. Signed-off-by: Yu Zhao --- mm/vmscan.c | 341 1 file changed, 341 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 31e1b4155677..6239b1acd84f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5468,6 +5468,347 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq, return true; } +/** + * the eviction + **/ + +static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate) +{ + bool success; + int gen = page_lru_gen(page); + int file = page_is_file_lru(page); + int zone = page_zonenum(page); + int tier = lru_tier_from_usage(page_tier_usage(page)); + struct lrugen *lrugen = &lruvec->evictable; + + VM_BUG_ON_PAGE(gen == -1, page); + VM_BUG_ON_PAGE(tier_to_isolate < 0, page); + + /* a lazy-free page that has been written into? */ + if (file && PageDirty(page) && PageAnon(page)) { + success = lru_gen_deletion(page, lruvec); + VM_BUG_ON_PAGE(!success, page); + SetPageSwapBacked(page); + add_page_to_lru_list_tail(page, lruvec); + return true; + } + + /* page_update_gen() has updated the page? */ + if (gen != lru_gen_from_seq(lrugen->min_seq[file])) { + list_move(&page->lru, &lrugen->lists[gen][file][zone]); + return true; + } + + /* activate the page if its tier has a higher refault rate */ + if (tier_to_isolate < tier) { + int sid = sid_from_seq_or_gen(gen); + + page_inc_gen(page, lruvec, false); + WRITE_ONCE(lrugen->activated[sid][file][tier - 1], + lrugen->activated[sid][file][tier - 1] + thp_nr_pages(page)); + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); + return true; + } + + /* +* A page can't be immediately evicted, and page_inc_gen() will mark it +* for reclaim and hopefully writeback will write it soon if it's dirty. +*/ + if (PageLocked(page) || PageWriteback(page) || (file && PageDirty(page))) { + page_inc_gen(page, lruvec, true); + return true; + } + + return false; +} + +static bool should_skip_page(struct page *page, struct scan_control *sc) +{ + if (!sc->may_unmap && page_mapped(page)) + return true; + + if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && + (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page + return true; + + if (!get_page_unless_zero(page)) + return true; + + if (!TestClearPageLRU(page)) { + put_page(page); + return true; + } + + return false; +} + +static void isolate_page(struct page *page, struct lruvec *lruvec) +{ + bool success; + + success = lru_gen_deletion(page, lruvec); + VM_BUG_ON_PAGE(!success, page); + + if (PageActive(page)) { + ClearPageActive(page); + /* make sure shrink_page_list() rejects this page */ + SetPageReferenced(page); + return; + } + + /* make sure shrink_page_list() doesn't try to write this page */ + ClearPageReclaim(page); + /* make sure shrink_page_list() doesn't reject this page */ + ClearPageReferenced(page); +} + +static int scan_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc, + long *nr_to_scan, int file, int tier, + struct list_head *list) +{ + bool success; + int gen, zone; + enum vm_event_item item; + int sorted = 0; + int scanned = 0; + int isolated = 0; + int batch_size = 0; + struct lrugen *lrugen = &lruvec->evictable; + + VM_BUG_ON(!list_empty(list)); + + if (get_nr_gens(lruvec, file) == MIN_NR_GENS) + return -ENOENT; + + gen = lru_gen_from_seq(lrugen->min_seq[file]); + + for (zone = sc->reclaim_idx; zone >= 0;
[PATCH v2 10/16] mm: multigenerational lru: mm_struct list
In order to scan page tables, we add an infrastructure to maintain either a system-wide mm_struct list or per-memcg mm_struct lists. Multiple threads can concurrently work on the same mm_struct list, and each of them will be given a different mm_struct. This infrastructure also tracks whether an mm_struct is being used on any CPUs or has been used since the last time a worker looked at it. In other words, workers will not be given an mm_struct that belongs to a process that has been sleeping. Signed-off-by: Yu Zhao --- fs/exec.c | 2 + include/linux/memcontrol.h | 6 + include/linux/mm_types.h | 117 ++ include/linux/mmzone.h | 2 - kernel/exit.c | 1 + kernel/fork.c | 10 ++ kernel/kthread.c | 1 + kernel/sched/core.c| 2 + mm/memcontrol.c| 28 mm/vmscan.c| 316 + 10 files changed, 483 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 18594f11c31f..c691d4d7720c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; + lru_gen_add_mm(mm); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for @@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm) if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); + lru_gen_switch_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); tsk->mm->vmacache_seqnum = 0; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f13dc02cf277..cff95ed1ee2b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -212,6 +212,8 @@ struct obj_cgroup { }; }; +struct lru_gen_mm_list; + /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -335,6 +337,10 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + struct lru_gen_mm_list *mm_list; +#endif + struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6613b26a8894..f8a239fbb958 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include @@ -383,6 +385,8 @@ struct core_state { struct completion startup; }; +#define ANON_AND_FILE 2 + struct kioctx_table; struct mm_struct { struct { @@ -561,6 +565,22 @@ struct mm_struct { #ifdef CONFIG_IOMMU_SUPPORT u32 pasid; +#endif +#ifdef CONFIG_LRU_GEN + struct { + /* the node of a global or per-memcg mm_struct list */ + struct list_head list; +#ifdef CONFIG_MEMCG + /* points to memcg of the owner task above */ + struct mem_cgroup *memcg; +#endif + /* whether this mm_struct has been used since the last walk */ + nodemask_t nodes[ANON_AND_FILE]; +#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* the number of CPUs using this mm_struct */ + atomic_t nr_cpus; +#endif + } lrugen; #endif } __randomize_layout; @@ -588,6 +608,103 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +void lru_gen_init_mm(struct mm_struct *mm); +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +int lru_gen_alloc_mm_list(struct mem_cgroup *memcg); +void lru_gen_free_mm_list(struct mem_cgroup *memcg); +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +/* + * Track the usage so mm_struct's that haven't been used since the last walk can + * be skipped. This function adds a theoretical overhead to each context switch, + * which hasn't been measurable. + */ +static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new) +{ + int file; + + /* exclude init_mm, efi_mm, etc. */ + if (!core_kernel_data((unsigned long)old)) { + VM_BUG_ON(old == &init_mm); + + for (file = 0; file < ANON_AND_FILE; file++) + nodes_setall(old->lrugen.nodes[file]); + +#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + atomic_dec(&old->lrugen.nr_cpus); + VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old); +#endif
[PATCH v2 11/16] mm: multigenerational lru: aging
The aging produces young generations. Given an lruvec, the aging walks the mm_struct list associated with this lruvec to scan page tables for referenced pages. Upon finding one, the aging updates the generation number of this page to max_seq. After each round of scan, the aging increments max_seq. The aging is due when both of min_seq[2] reaches max_seq-1, assuming both anon and file types are reclaimable. The aging uses the following optimizations when scanning page tables: 1) It will not scan page tables from processes that have been sleeping since the last scan. 2) It will not scan PTE tables under non-leaf PMD entries that do not have the accessed bit set, when CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y. 3) It will not zigzag between the PGD table and the same PMD or PTE table spanning multiple VMAs. In other words, it finishes all the VMAs with the range of the same PMD or PTE table before it returns to the PGD table. This optimizes workloads that have large numbers of tiny VMAs, especially when CONFIG_PGTABLE_LEVELS=5. Signed-off-by: Yu Zhao --- mm/vmscan.c | 700 1 file changed, 700 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index d67dfd1e3930..31e1b4155677 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -4771,6 +4772,702 @@ static bool get_next_mm(struct mm_walk_args *args, int swappiness, struct mm_str return last; } +/** + * the aging + **/ + +static void update_batch_size(struct page *page, int old_gen, int new_gen, + struct mm_walk_args *args) +{ + int file = page_is_file_lru(page); + int zone = page_zonenum(page); + int delta = thp_nr_pages(page); + + VM_BUG_ON(old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen >= MAX_NR_GENS); + + args->batch_size++; + + args->nr_pages[old_gen][file][zone] -= delta; + args->nr_pages[new_gen][file][zone] += delta; +} + +static void reset_batch_size(struct lruvec *lruvec, struct mm_walk_args *args) +{ + int gen, file, zone; + struct lrugen *lrugen = &lruvec->evictable; + + args->batch_size = 0; + + spin_lock_irq(&lruvec->lru_lock); + + for_each_gen_type_zone(gen, file, zone) { + enum lru_list lru = LRU_FILE * file; + int total = args->nr_pages[gen][file][zone]; + + if (!total) + continue; + + args->nr_pages[gen][file][zone] = 0; + WRITE_ONCE(lrugen->sizes[gen][file][zone], + lrugen->sizes[gen][file][zone] + total); + + if (lru_gen_is_active(lruvec, gen)) + lru += LRU_ACTIVE; + update_lru_size(lruvec, lru, zone, total); + } + + spin_unlock_irq(&lruvec->lru_lock); +} + +static int page_update_gen(struct page *page, int new_gen) +{ + int old_gen; + unsigned long old_flags, new_flags; + + VM_BUG_ON(new_gen >= MAX_NR_GENS); + + do { + old_flags = READ_ONCE(page->flags); + + old_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + if (old_gen < 0) + new_flags = old_flags | BIT(PG_referenced); + else + new_flags = (old_flags & ~(LRU_GEN_MASK | LRU_USAGE_MASK | +LRU_TIER_FLAGS)) | ((new_gen + 1UL) << LRU_GEN_PGOFF); + + if (old_flags == new_flags) + break; + } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + return old_gen; +} + +static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct mm_walk_args *args = walk->private; + + if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) || + (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))) + return true; + + if (vma_is_anonymous(vma)) + return !args->should_walk[0]; + + if (vma_is_shmem(vma)) + return !args->should_walk[0] || + mapping_unevictable(vma->vm_file->f_mapping); + + return !args->should_walk[1] || vma_is_dax(vma) || + vma == get_gate_vma(vma->vm_mm) || + mapping_unevictable(vma->vm_file->f_mapping); +} + +/* + * Some userspace memory allocators create many single-page VMAs. So instead of + * returning back to the PGD table for each of such VMAs, we finish at least an + * entire PMD table and therefore avoid many zigzags. This optimizes page table + * walks for workloads that have large numbers of tiny VMAs. + * + * We scan PMD tables in two pass. Th
[PATCH v2 08/16] mm: multigenerational lru: groundwork
For each lruvec, evictable pages are divided into multiple generations. The youngest generation number is stored in max_seq for both anon and file types as they are aged on an equal footing. The oldest generation numbers are stored in min_seq[2] separately for anon and file types as clean file pages can be evicted regardless of may_swap or may_writepage. Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into page->flags. The sliding window technique is used to prevent truncated generation numbers from overlapping. Each truncated generation number is an index to lruvec->evictable.lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]. Evictable pages are added to the per-zone lists indexed by max_seq or min_seq[2] (modulo MAX_NR_GENS), depending on whether they are being faulted in. The workflow comprises two conceptually independent functions: the aging and the eviction. The aging produces young generations. Given an lruvec, the aging scans page tables for referenced pages of this lruvec. Upon finding one, the aging updates its generation number to max_seq. After each round of scan, the aging increments max_seq. The aging is due when both of min_seq[2] reaches max_seq-1, assuming both anon and file types are reclaimable. The eviction consumes old generations. Given an lruvec, the eviction scans the pages on the per-zone lists indexed by either of min_seq[2]. It tries to select a type based on the values of min_seq[2] and swappiness. During a scan, the eviction sorts pages according to their generation numbers, if the aging has found them referenced. When it finds all the per-zone lists of a selected type are empty, the eviction increments min_seq[2] indexed by this selected type. Signed-off-by: Yu Zhao --- fs/fuse/dev.c | 3 +- include/linux/mm.h| 2 + include/linux/mm_inline.h | 193 +++ include/linux/mmzone.h| 110 +++ include/linux/page-flags-layout.h | 20 +- include/linux/page-flags.h| 4 +- kernel/bounds.c | 6 + mm/huge_memory.c | 3 +- mm/mm_init.c | 16 +- mm/mmzone.c | 2 + mm/swapfile.c | 4 + mm/vmscan.c | 305 ++ 12 files changed, 656 insertions(+), 12 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c0fee830a34e..27c83f557794 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -784,7 +784,8 @@ static int fuse_check_page(struct page *page) 1 << PG_lru | 1 << PG_active | 1 << PG_reclaim | - 1 << PG_waiters))) { + 1 << PG_waiters | + LRU_GEN_MASK | LRU_USAGE_MASK))) { dump_page(page, "fuse: trying to steal weird page"); return 1; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8ba434287387..2c8a2db78ce9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1070,6 +1070,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define ZONES_PGOFF(NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_USAGE_PGOFF(LRU_GEN_PGOFF - LRU_USAGE_WIDTH) /* * Define the bit shifts to access each section. For non-existent diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd..2bf910eb3dd7 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -79,11 +79,198 @@ static __always_inline enum lru_list page_lru(struct page *page) return lru; } +#ifdef CONFIG_LRU_GEN + +#ifdef CONFIG_LRU_GEN_ENABLED +DECLARE_STATIC_KEY_TRUE(lru_gen_static_key); +#define lru_gen_enabled() static_branch_likely(&lru_gen_static_key) +#else +DECLARE_STATIC_KEY_FALSE(lru_gen_static_key); +#define lru_gen_enabled() static_branch_unlikely(&lru_gen_static_key) +#endif + +/* We track at most MAX_NR_GENS generations using the sliding window technique. */ +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +/* Return a proper index regardless whether we keep a full history of stats. */ +static inline int sid_from_seq_or_gen(int seq_or_gen) +{ + return seq_or_gen % NR_STAT_GENS; +} + +/* The youngest and the second youngest generations are considered active. */ +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = READ_ONCE(lruvec->evictable.max_seq); + + VM_BUG_ON(!max_seq); + VM_BUG_ON(gen >= MAX_NR_GENS); + + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +/* Update the sizes of the multigenerational lru. */ +static inline void lru_gen_update_size(struct page
[PATCH v2 09/16] mm: multigenerational lru: activation
For pages accessed multiple times via file descriptors, instead of activating them upon the second accesses, we activate them based on the refault rates of their tiers. Pages accessed N times via file descriptors belong to tier order_base_2(N). Pages from tier 0, i.e., those read ahead, accessed once via file descriptors and accessed only via page tables, are evicted regardless of the refault rate. Pages from other tiers will be moved to the next generation, i.e., activated, if the refault rates of their tiers are higher than that of tier 0. Each generation contains at most MAX_NR_TIERS tiers, and they require additional MAX_NR_TIERS-2 bits in page->flags. This feedback model has a few advantages over the current feedforward model: 1) It has a negligible overhead in the access path because activations are done in the reclaim path. 2) It takes mapped pages into account and avoids overprotecting pages accessed multiple times via file descriptors. 3) More tiers offer better protection to pages accessed more than twice when buffered-I/O-intensive workloads are under memory pressure. For pages mapped upon page faults, the accessed bit is set and they must be properly aged. We add them to the per-zone lists index by max_seq, i.e., the youngest generation. For pages not in page cache or swap cache, this can be done easily in the page fault path: we rename lru_cache_add_inactive_or_unevictable() to lru_cache_add_page_vma() and add a new parameter, which is set to true for pages mapped upon page faults. For pages in page cache or swap cache, we cannot differentiate the page fault path from the read ahead path at the time we call lru_cache_add() in add_to_page_cache_lru() and __read_swap_cache_async(). So we add a new function lru_gen_activation(), which is essentially activate_page(), to move pages to the per-zone lists indexed by max_seq at a later time. Hopefully we would find those pages in lru_pvecs.lru_add and simply set PageActive() on them without having to actually move them. Finally, we need to be compatible with the existing notion of active and inactive. We cannot use PageActive() because it is not set on active pages unless they are isolated, in order to spare the aging the trouble of clearing it when an active generation becomes inactive. A new function page_is_active() compares the generation number of a page with max_seq and max_seq-1 (modulo MAX_NR_GENS), which are considered active and protected from the eviction. Other generations, which may or may not exist, are considered inactive. Signed-off-by: Yu Zhao --- fs/proc/task_mmu.c| 3 +- include/linux/mm_inline.h | 101 + include/linux/swap.h | 4 +- kernel/events/uprobes.c | 2 +- mm/huge_memory.c | 2 +- mm/khugepaged.c | 2 +- mm/memory.c | 14 +-- mm/migrate.c | 2 +- mm/swap.c | 26 +++--- mm/swapfile.c | 2 +- mm/userfaultfd.c | 2 +- mm/vmscan.c | 91 ++- mm/workingset.c | 179 +++--- 13 files changed, 371 insertions(+), 59 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e862cab69583..d292f20c4e3d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1718,7 +1719,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, if (PageSwapCache(page)) md->swapcache += nr_pages; - if (PageActive(page) || PageUnevictable(page)) + if (PageUnevictable(page) || page_is_active(compound_head(page), NULL)) md->active += nr_pages; if (PageWriteback(page)) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2bf910eb3dd7..5eb4b12972ec 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -95,6 +95,12 @@ static inline int lru_gen_from_seq(unsigned long seq) return seq % MAX_NR_GENS; } +/* Convert the level of usage to a tier. See the comment on MAX_NR_TIERS. */ +static inline int lru_tier_from_usage(int usage) +{ + return order_base_2(usage + 1); +} + /* Return a proper index regardless whether we keep a full history of stats. */ static inline int sid_from_seq_or_gen(int seq_or_gen) { @@ -238,12 +244,93 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec) return true; } +/* Activate a page from page cache or swap cache after it's mapped. */ +static inline void lru_gen_activation(struct page *page, struct vm_area_struct *vma) +{ + if (!lru_gen_enabled()) + return; + + if (PageActive(page) || PageUnevictable(page) || vma_is_dax(vma) || + (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))) + return; + /* +* TODO: pass vm_fault to add_to_page_cache_lru() and +* __read_swa
[PATCH v2 01/16] include/linux/memcontrol.h: do not warn in page_memcg_rcu() if !CONFIG_MEMCG
page_memcg_rcu() warns on !rcu_read_lock_held() regardless of CONFIG_MEMCG. The following code is legit, but it triggers the warning when !CONFIG_MEMCG, since lock_page_memcg() and unlock_page_memcg() are empty for this config. memcg = lock_page_memcg(page1) (rcu_read_lock() if CONFIG_MEMCG=y) do something to page1 if (page_memcg_rcu(page2) == memcg) do something to page2 too as it cannot be migrated away from the memcg either. unlock_page_memcg(page1) (rcu_read_unlock() if CONFIG_MEMCG=y) Locking/unlocking rcu consistently for both configs is rigorous but it also forces unnecessary locking upon users who have no interest in CONFIG_MEMCG. This patch removes the assertion for !CONFIG_MEMCG, because page_memcg_rcu() has a few callers and there are no concerns regarding their correctness at the moment. Signed-off-by: Yu Zhao --- include/linux/memcontrol.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c04d39a7967..f13dc02cf277 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1077,7 +1077,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page) static inline struct mem_cgroup *page_memcg_rcu(struct page *page) { - WARN_ON_ONCE(!rcu_read_lock_held()); return NULL; } -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 04/16] include/linux/cgroup.h: export cgroup_mutex
cgroup_mutex is needed to synchronize with memcg creations. Signed-off-by: Yu Zhao --- include/linux/cgroup.h | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4f2f79de083e..bd5744360cfa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -704,6 +715,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 06/16] mm, x86: support the access bit on non-leaf PMD entries
Some architectures support the accessed bit on non-leaf PMD entries (parents) in addition to leaf PTE entries (children) where pages are mapped, e.g., x86_64 sets the accessed bit on a parent when using it as part of linear-address translation [1]. Page table walkers who are interested in the accessed bit on children can take advantage of this: they do not need to search the children when the accessed bit is not set on a parent, given that they have previously cleared the accessed bit on this parent. [1]: Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3 (October 2019), section 4.8 Signed-off-by: Yu Zhao --- arch/Kconfig | 9 + arch/x86/Kconfig | 1 + arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/pgtable.c | 5 - include/linux/pgtable.h| 4 ++-- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index ecfd3520b676..cbd7f66734ee 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -782,6 +782,15 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD bool +config HAVE_ARCH_PARENT_PMD_YOUNG + bool + depends on PGTABLE_LEVELS > 2 + help + Architectures that select this are able to set the accessed bit on + non-leaf PMD entries in addition to leaf PTE entries where pages are + mapped. For them, page table walkers that clear the accessed bit may + stop at non-leaf PMD entries when they do not see the accessed bit. + config HAVE_ARCH_HUGE_VMAP bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2792879d398e..b5972eb82337 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -163,6 +163,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 + select HAVE_ARCH_PARENT_PMD_YOUNG if X86_64 select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a02c67291cfc..a6b5cfe1fc5a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE; } static inline unsigned long pages_to_mb(unsigned long npg) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index f6a9e2e36642..1c27e6f43f80 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5e772392a379..08dd9b8c055a 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 05/16] mm/swap.c: export activate_page()
activate_page() is needed to activate pages that are already on lru or queued in lru_pvecs.lru_add. The exported function is a merger between the existing activate_page() and __lru_cache_activate_page(). Signed-off-by: Yu Zhao --- include/linux/swap.h | 1 + mm/swap.c| 28 +++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 4cc6ec3bf0ab..de2bbbf181ba 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -344,6 +344,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); +extern void activate_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/mm/swap.c b/mm/swap.c index 31b844d4ed94..f20ed56ebbbf 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -334,7 +334,7 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +static void activate_page_on_lru(struct page *page) { page = compound_head(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { @@ -354,7 +354,7 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +static void activate_page_on_lru(struct page *page) { struct lruvec *lruvec; @@ -368,11 +368,22 @@ static void activate_page(struct page *page) } #endif -static void __lru_cache_activate_page(struct page *page) +/* + * If the page is on the LRU, queue it for activation via + * lru_pvecs.activate_page. Otherwise, assume the page is on a + * pagevec, mark it active and it'll be moved to the active + * LRU on the next drain. + */ +void activate_page(struct page *page) { struct pagevec *pvec; int i; + if (PageLRU(page)) { + activate_page_on_lru(page); + return; + } + local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); @@ -421,16 +432,7 @@ void mark_page_accessed(struct page *page) * evictable page accessed has no effect. */ } else if (!PageActive(page)) { - /* -* If the page is on the LRU, queue it for activation via -* lru_pvecs.activate_page. Otherwise, assume the page is on a -* pagevec, mark it active and it'll be moved to the active -* LRU on the next drain. -*/ - if (PageLRU(page)) - activate_page(page); - else - __lru_cache_activate_page(page); + activate_page(page); ClearPageReferenced(page); workingset_activation(page); } -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 03/16] include/linux/huge_mm.h: define is_huge_zero_pmd() if !CONFIG_TRANSPARENT_HUGEPAGE
Currently is_huge_zero_pmd() only exists when CONFIG_TRANSPARENT_HUGEPAGE=y. This patch adds the function for !CONFIG_TRANSPARENT_HUGEPAGE. Signed-off-by: Yu Zhao --- include/linux/huge_mm.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ba973efcd369..0ba7b3f9029c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -443,6 +443,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return false; +} + static inline bool is_huge_zero_pud(pud_t pud) { return false; -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 02/16] include/linux/nodemask.h: define next_memory_node() if !CONFIG_NUMA
Currently next_memory_node only exists when CONFIG_NUMA=y. This patch adds the macro for !CONFIG_NUMA. Signed-off-by: Yu Zhao --- include/linux/nodemask.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index ac398e143c9a..89fe4e3592f9 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids1U #define nr_online_nodes1U -- 2.31.1.295.g9ea45b61b8-goog
[PATCH v2 07/16] mm/vmscan.c: refactor shrink_node()
Heuristics that determine scan balance between anon and file LRUs are rather independent. Move them into a separate function to improve readability. Signed-off-by: Yu Zhao --- mm/vmscan.c | 186 +++- 1 file changed, 98 insertions(+), 88 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 562e87cbd7a1..1a24d2e0a4cb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2224,6 +2224,103 @@ enum scan_balance { SCAN_FILE, }; +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long file; + struct lruvec *target_lruvec; + + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + + /* +* Determine the scan balance between anon and file LRUs. +*/ + spin_lock_irq(&target_lruvec->lru_lock); + sc->anon_cost = target_lruvec->anon_cost; + sc->file_cost = target_lruvec->file_cost; + spin_unlock_irq(&target_lruvec->lru_lock); + + /* +* Target desirable inactive:active list ratios for the anon +* and file LRU lists. +*/ + if (!sc->force_deactivate) { + unsigned long refaults; + + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + sc->may_deactivate |= DEACTIVATE_ANON; + else + sc->may_deactivate &= ~DEACTIVATE_ANON; + + /* +* When refaults are being observed, it means a new +* workingset is being established. Deactivate to get +* rid of any stale active pages quickly. +*/ + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) + sc->may_deactivate |= DEACTIVATE_FILE; + else + sc->may_deactivate &= ~DEACTIVATE_FILE; + } else + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; + + /* +* If we have plenty of inactive file pages that aren't +* thrashing, try to reclaim those first before touching +* anonymous pages. +*/ + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) + sc->cache_trim_mode = 1; + else + sc->cache_trim_mode = 0; + + /* +* Prevent the reclaimer from falling into the cache trap: as +* cache pages start out inactive, every cache fault will tip +* the scan balance towards the file LRU. And as the file LRU +* shrinks, so does the window for rotation from references. +* This means we have a runaway feedback loop where a tiny +* thrashing file LRU becomes infinitely more attractive than +* anon pages. Try to detect this based on file LRU size. +*/ + if (!cgroup_reclaim(sc)) { + unsigned long total_high_wmark = 0; + unsigned long free, anon; + int z; + + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); + file = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); + + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + if (!managed_zone(zone)) + continue; + + total_high_wmark += high_wmark_pages(zone); + } + + /* +* Consider anon: if that's low too, this isn't a +* runaway file reclaim problem, but rather just +* extreme pressure. Reclaim as per usual then. +*/ + anon = node_page_state(pgdat, NR_INACTIVE_ANON); + + sc->file_is_tiny = + file + free <= total_high_wmark && + !(sc->may_deactivate & DEACTIVATE_ANON) && + anon >> sc->priority; + } +} + /* * Determine how aggressively the anon and file LRU lists should be * scanned. The relative value of each set of LRU lists is determined @@ -2669,7 +2766,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long nr_reclaimed, nr_scanned; struct lruvec *target_lruvec; bool reclaimable = false; - unsigned long file; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); @@ -2679,93 +2775,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control
[PATCH v2 00/16] Multigenerational LRU Framework
What's new in v2 Special thanks to Jens Axboe for reporting a regression in buffered I/O and helping test the fix. This version includes the support of tiers, which represent levels of usage from file descriptors only. Pages accessed N times via file descriptors belong to tier order_base_2(N). Each generation contains at most MAX_NR_TIERS tiers, and they require additional MAX_NR_TIERS-2 bits in page->flags. In contrast to moving across generations which requires the lru lock, moving across tiers only involves an atomic operation on page->flags and therefore has a negligible cost. A feedback loop modeled after the well-known PID controller monitors the refault rates across all tiers and decides when to activate pages from which tiers, on the reclaim path. This feedback model has a few advantages over the current feedforward model: 1) It has a negligible overhead in the buffered I/O access path because activations are done in the reclaim path. 2) It takes mapped pages into account and avoids overprotecting pages accessed multiple times via file descriptors. 3) More tiers offer better protection to pages accessed more than twice when buffered-I/O-intensive workloads are under memory pressure. The fio/io_uring benchmark shows 14% improvement in IOPS when randomly accessing Samsung PM981a in the buffered I/O mode. Highlights from the discussions on v1 = Thanks to Ying Huang and Dave Hansen for the comments and suggestions on page table scanning. A simple worst-case scenario test did not find page table scanning underperforms the rmap because of the following optimizations: 1) It will not scan page tables from processes that have been sleeping since the last scan. 2) It will not scan PTE tables under non-leaf PMD entries that do not have the accessed bit set, when CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y. 3) It will not zigzag between the PGD table and the same PMD or PTE table spanning multiple VMAs. In other words, it finishes all the VMAs with the range of the same PMD or PTE table before it returns to the PGD table. This optimizes workloads that have large numbers of tiny VMAs, especially when CONFIG_PGTABLE_LEVELS=5. TLDR The current page reclaim is too expensive in terms of CPU usage and often making poor choices about what to evict. We would like to offer an alternative framework that is performant, versatile and straightforward. Repo git fetch https://linux-mm.googlesource.com/page-reclaim refs/changes/73/1173/1 Gerrit https://linux-mm-review.googlesource.com/c/page-reclaim/+/1173 Background == DRAM is a major factor in total cost of ownership, and improving memory overcommit brings a high return on investment. Over the past decade of research and experimentation in memory overcommit, we observed a distinct trend across millions of servers and clients: the size of page cache has been decreasing because of the growing popularity of cloud storage. Nowadays anon pages account for more than 90% of our memory consumption and page cache contains mostly executable pages. Problems Notion of active/inactive - For servers equipped with hundreds of gigabytes of memory, the granularity of the active/inactive is too coarse to be useful for job scheduling. False active/inactive rates are relatively high, and thus the assumed savings may not materialize. For phones and laptops, executable pages are frequently evicted despite the fact that there are many less recently used anon pages. Major faults on executable pages cause "janks" (slow UI renderings) and negatively impact user experience. For lruvecs from different memcgs or nodes, comparisons are impossible due to the lack of a common frame of reference. Incremental scans via rmap -- Each incremental scan picks up at where the last scan left off and stops after it has found a handful of unreferenced pages. For workloads using a large amount of anon memory, incremental scans lose the advantage under sustained memory pressure due to high ratios of the number of scanned pages to the number of reclaimed pages. In our case, the average ratio of pgscan to pgsteal is above 7. On top of that, the rmap has poor memory locality due to its complex data structures. The combined effects typically result in a high amount of CPU usage in the reclaim path. For example, with zram, a typical kswapd profile on v5.11 looks like: 31.03% page_vma_mapped_walk 25.59% lzo1x_1_do_compress 4.63% do_raw_spin_lock 3.89% vma_interval_tree_iter_next 3.33% vma_interval_tree_subtree_search And with real swap, it looks like: 45.16% page_vma_mapped_walk 7.61% do_raw_spin_lock 5.69% vma_interval_tree_iter_next 4.91% vma_interval_tree_subtree_search 3.71% page_referenced_one Solutions = Notion of generation numbers The notion of generation numbers introduces a q
Re: [PATCH 4/7] mm: Introduce verify_page_range()
On Mon, Apr 12, 2021 at 01:05:09PM -0700, Kees Cook wrote: > On Mon, Apr 12, 2021 at 10:00:16AM +0200, Peter Zijlstra wrote: > > +struct vpr_data { > > + int (*fn)(pte_t pte, unsigned long addr, void *data); > > + void *data; > > +}; > > Eeerg. This is likely to become an attack target itself. Stored function > pointer with stored (3rd) argument. You got some further reading on that? How exactly are those exploited?
Re: Re: [PATCH] phy: nxp-c45: add driver for tja1103
Hi Andrew, On 4/12/2021 6:52 PM, Andrew Lunn wrote: So what you are say is, you don't care if the IP is completely different, it all goes in one driver. So lets put this driver into nxp-tja11xx.c. And then we avoid all the naming issues. Andrew As this seems to be a key question, let me try and shed some more light on this. The original series of BASE-T1 PHYs includes TJA110, TJA1101, and TJA1102. They are covered by the existing driver, which has the unfortunate naming TJA11xx. Unfortunate, because the use of wildcards is a bit to generous. E.g. the naming would also include a TJA1145, which is a high-speed CAN transceiver. The truth is, extrapolating wildcards in product names doesn't work as there is not guarantee of future product names. The mentioned TJA1100/1/2 are *fairly* software-compatible, which is why it makes sense to have a shared driver. When it gets to TJA1103, there is no SW compatibility, which is why we decided to create a new driver. We want to support all future Ethernet PHY devices with this codebase, and that is why the naming is that generic. The common denominator of the devices is that they are NXP products and use clause 45 addressing. When you say we don't care that the IP is different, that doesn't quite fit. Just because the MDI is different, the register map does not need to change much, so it will be easy to support future PHYs also when using different PHY technology. Moving the code into TJA11xx is creating more issues, as it assumes that the devices which are managed by the driver are always TJA... devices which may not be true. Christian
Re: [PATCH] mm: optimize memory allocation
On Mon 12-04-21 15:49:53, ultrac...@163.com wrote: > From: Chen Xiaoguang > > Check memory cgroup limit before allocating real memory. This may > improve performance especially in slow path when memory allocation > exceeds cgroup limitation. I would be really curious about any actual numbers because I have really hard times to see scenarios when this would lead to an improvement. Effectitelly only non-oom allocations would benefit theoretically (e.g. atomic or GFP_NORETRY etc). All others will trigger the memcg oom killer to help forward progress. Besides that I really dislike kmem and LRU pages to be handled differently so for that reason Nacked-by: Michal Hocko If the optimization really can be provent then the patch would require to be much more invasive. > Signed-off-by: Chen Xiaoguang > Signed-off-by: Chen He > --- > include/linux/memcontrol.h | 30 ++ > mm/memcontrol.c| 34 -- > mm/page_alloc.c| 24 +--- > 3 files changed, 55 insertions(+), 33 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 0c04d39..59bb3ba 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -1583,8 +1583,9 @@ static inline void memcg_set_shrinker_bit(struct > mem_cgroup *memcg, > #endif > > #ifdef CONFIG_MEMCG_KMEM > -int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); > -void __memcg_kmem_uncharge_page(struct page *page, int order); > +int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order); > +void __memcg_kmem_uncharge_page(struct page *page, int order, > + struct mem_cgroup *memcg); > > struct obj_cgroup *get_obj_cgroup_from_current(void); > > @@ -1610,18 +1611,30 @@ static inline bool memcg_kmem_enabled(void) > return static_branch_likely(&memcg_kmem_enabled_key); > } > > +extern struct mem_cgroup *get_mem_cgroup_from_current(void); > + > static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, >int order) > { > - if (memcg_kmem_enabled()) > - return __memcg_kmem_charge_page(page, gfp, order); > - return 0; > + struct mem_cgroup *memcg; > + int ret = 0; > + > + memcg = get_mem_cgroup_from_current(); > + if (memcg && memcg_kmem_enabled() && !mem_cgroup_is_root(memcg)) { > + ret = __memcg_kmem_charge_page(memcg, gfp, order); > + if (!ret) { > + page->memcg_data = (unsigned long)memcg | > MEMCG_DATA_KMEM; > + return 0; > + } > + css_put(&memcg->css); > + } > + return ret; > } > > static inline void memcg_kmem_uncharge_page(struct page *page, int order) > { > if (memcg_kmem_enabled()) > - __memcg_kmem_uncharge_page(page, order); > + __memcg_kmem_uncharge_page(page, order, NULL); > } > > /* > @@ -1647,13 +1660,14 @@ static inline void memcg_kmem_uncharge_page(struct > page *page, int order) > { > } > > -static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, > +static inline int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t > gfp, > int order) > { > return 0; > } > > -static inline void __memcg_kmem_uncharge_page(struct page *page, int order) > +static inline void __memcg_kmem_uncharge_page(struct page *page, int order, > + struct mem_cgroup *memcg) > { > } > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index e064ac0d..8df57b7 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -1085,7 +1085,7 @@ static __always_inline bool memcg_kmem_bypass(void) > /** > * If active memcg is set, do not fallback to current->mm->memcg. > */ > -static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void) > +struct mem_cgroup *get_mem_cgroup_from_current(void) > { > if (memcg_kmem_bypass()) > return NULL; > @@ -3113,21 +3113,11 @@ static void __memcg_kmem_uncharge(struct mem_cgroup > *memcg, unsigned int nr_page > * > * Returns 0 on success, an error code on failure. > */ > -int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) > +int __memcg_kmem_charge_page(struct mem_cgroup *memcg, gfp_t gfp, int order) > { > - struct mem_cgroup *memcg; > - int ret = 0; > + int ret; > > - memcg = get_mem_cgroup_from_current(); > - if (memcg && !mem_cgroup_is_root(memcg)) { > - ret = __memcg_kmem_charge(memcg, gfp, 1 << order); > - if (!ret) { > - page->memcg_data = (unsigned long)memcg | > - MEMCG_DATA_KMEM; > - return 0; > - } > - css_put(&memcg->css); > - } > + ret = __memcg_kmem_charge(memcg, gfp, 1 << order); >
Re: [PATCH] kunit: add unit test for filtering suites by names
Hi Daniel, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on 1678e493d530e7977cce34e59a86bb86f3c5631e] url: https://github.com/0day-ci/linux/commits/Daniel-Latypov/kunit-add-unit-test-for-filtering-suites-by-names/20210413-080913 base: 1678e493d530e7977cce34e59a86bb86f3c5631e config: microblaze-randconfig-r014-20210413 (attached as .config) compiler: microblaze-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/756df216f1586cecdf02f278fbed232fb25fa3f7 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Daniel-Latypov/kunit-add-unit-test-for-filtering-suites-by-names/20210413-080913 git checkout 756df216f1586cecdf02f278fbed232fb25fa3f7 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=microblaze If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): In file included from lib/kunit/executor.c:119: lib/kunit/executor_test.c: In function 'alloc_fake_suite': >> lib/kunit/executor_test.c:129:2: warning: 'strncpy' specified bound 256 >> equals destination size [-Wstringop-truncation] 129 | strncpy((char *)suite->name, suite_name, sizeof(suite->name)); | ^ vim +/strncpy +129 lib/kunit/executor_test.c 121 122 static struct kunit_suite *alloc_fake_suite(struct kunit *test, 123 const char *suite_name) 124 { 125 struct kunit_suite *suite; 126 127 /* We normally never expect to allocate suites, hence the non-const cast. */ 128 suite = kunit_kzalloc(test, sizeof(*suite), GFP_KERNEL); > 129 strncpy((char *)suite->name, suite_name, sizeof(suite->name)); --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
Re: [PATCH 6/7] i915: Convert to verify_page_range()
On Mon, Apr 12, 2021 at 01:08:38PM -0700, Kees Cook wrote: > On Mon, Apr 12, 2021 at 10:00:18AM +0200, Peter Zijlstra wrote: > > @@ -1249,14 +1249,14 @@ static int check_absent_pte(pte_t *pte, > > > > static int check_present(unsigned long addr, unsigned long len) > > { > > - return apply_to_page_range(current->mm, addr, len, > > - check_present_pte, (void *)addr); > > + return verify_page_range(current->mm, addr, len, > > +check_present_pte, (void *)addr); > > For example, switch to returning bad addr through verify_page_range(), > or have a by-reference value, etc: > > unsigned long failed; > > failed = verify_page_range(current->mm< addr, len, check_present_pte); > if (failed) { > pr_err("missing PTE:%lx\n", > (addr - failed) >> PAGE_SHIFT); OK, lemme try that.
Re: [Outreachy kernel] Subject: [PATCH v2] staging: media: meson: vdec: declare u32 as static const appropriately
On Tue, 13 Apr 2021, Mitali Borkar wrote: > Declared 32 bit unsigned int as static constant inside a function > appropriately. I don't think that the description matches what is done. Perhaps all the meaning is intended to be in the word "appropriately", but that is not very clear. The message makes it looks like static const is the new part, but it is already there. julia > > Reported-by: kernel test robot > Signed-off-by: Mitali Borkar > --- > > Changes from v1:- Rectified the mistake by declaring u32 as static const > properly. > > drivers/staging/media/meson/vdec/codec_h264.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/staging/media/meson/vdec/codec_h264.c > b/drivers/staging/media/meson/vdec/codec_h264.c > index ea86e9e1c447..80141b89a9f6 100644 > --- a/drivers/staging/media/meson/vdec/codec_h264.c > +++ b/drivers/staging/media/meson/vdec/codec_h264.c > @@ -287,8 +287,8 @@ static void codec_h264_resume(struct amvdec_session *sess) > struct amvdec_core *core = sess->core; > struct codec_h264 *h264 = sess->priv; > u32 mb_width, mb_height, mb_total; > - static const u32[] canvas3 = { ANCO_CANVAS_ADDR, 0 }; > - static const u32[] canvas4 = { 24, 0 }; > + static const u32 canvas3[] = { ANCO_CANVAS_ADDR, 0 }; > + static const u32 canvas4[] = { 24, 0 }; > > amvdec_set_canvases(sess, canvas3, canvas4); > > -- > 2.30.2 > > -- > You received this message because you are subscribed to the Google Groups > "outreachy-kernel" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to outreachy-kernel+unsubscr...@googlegroups.com. > To view this discussion on the web visit > https://groups.google.com/d/msgid/outreachy-kernel/YHU56OM%2BC2zY34VP%40kali. >
Re: [PATCH 3/3] sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
On Mon, Apr 12, 2021 at 06:22:42PM +0100, Valentin Schneider wrote: > On 12/04/21 14:03, Peter Zijlstra wrote: > > On Thu, Mar 11, 2021 at 03:13:04PM +, Valentin Schneider wrote: > >> Peter Zijlstra writes: > >> > @@ -7910,6 +7908,14 @@ int sched_cpu_deactivate(unsigned int cp > >> >} > >> >rq_unlock_irqrestore(rq, &rf); > >> > > >> > +/* > >> > + * From this point forward, this CPU will refuse to run any > >> > task that > >> > + * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will > >> > actively > >> > + * push those tasks away until this gets cleared, see > >> > + * sched_cpu_dying(). > >> > + */ > >> > +balance_push_set(cpu, true); > >> > + > >> > >> AIUI with cpu_dying_mask being flipped before even entering > >> sched_cpu_deactivate(), we don't need this to be before the > >> synchronize_rcu() anymore; is there more than that to why you're punting it > >> back this side of it? > > > > I think it does does need to be like this, we need to clearly separate > > the active=true and balance_push_set(). If we were to somehow observe > > both balance_push_set() and active==false, we'd be in trouble. > > > > I'm afraid I don't follow; we're replacing a read of rq->balance_push with > cpu_dying(), and those are still written on the same side of the > synchronize_rcu(). What am I missing? Yeah, I'm not sure anymnore either; I tried to work out why I'd done that but upon closer examination everything fell flat. Let me try again today :-) > Oooh, I can't read, only the boot CPU gets its callback uninstalled in > sched_init()! So secondaries keep push_callback installed up until > sched_cpu_activate(), but as you said it's not effective unless a rollback > happens. > > Now, doesn't that mean we should *not* uninstall the callback in > sched_cpu_dying()? AFAIK it's possible for the initial secondary CPU > boot to go fine, but the next offline+online cycle fails while going up - > that would need to rollback with push_callback installed. Quite; I removed that shortly after sending this; when I tried to write a comment and found it.
Re: [PATCH] MIPS: fix memory reservation for non-usermem setups
On Mon, Apr 12, 2021 at 11:45 PM Ilya Lipnitskiy wrote: > > Hi Thomas, > > On Tue, Apr 6, 2021 at 6:18 AM Thomas Bogendoerfer > wrote: > > > > On Sat, Apr 03, 2021 at 07:02:13PM -0700, Ilya Lipnitskiy wrote: > > > Hi Mike, > > > > > > On Tue, Mar 16, 2021 at 11:33 PM Mike Rapoport wrote: > > > > > > > > Hi Ilya, > > > > > > > > On Tue, Mar 16, 2021 at 10:10:09PM -0700, Ilya Lipnitskiy wrote: > > > > > Hi Thomas, > > > > > > > > > > On Fri, Mar 12, 2021 at 7:19 AM Thomas Bogendoerfer > > > > > wrote: > > > > > > > > > > > > On Sun, Mar 07, 2021 at 11:40:30AM -0800, Ilya Lipnitskiy wrote: > > > > > > > From: Tobias Wolf > > > > > > > > > > > > > > Commit 67a3ba25aa95 ("MIPS: Fix incorrect mem=X@Y handling") > > > > > > > introduced a new > > > > > > > issue for rt288x where "PHYS_OFFSET" is 0x0 but the calculated > > > > > > > "ramstart" is > > > > > > > not. As the prerequisite of custom memory map has been removed, > > > > > > > this results > > > > > > > in the full memory range of 0x0 - 0x800 to be marked as > > > > > > > reserved for this > > > > > > > platform. > > > > > > > > > > > > and where is the problem here ? > > > > > Turns out this was already attempted to be upstreamed - not clear why > > > > > it wasn't merged. Context: > > > > > https://lore.kernel.org/linux-mips/6504517.U6H5IhoIOn@loki/ > > > > > > > > > > I hope the thread above helps you understand the problem. > > > > > > > > The memory initialization was a bit different then. Do you still see the > > > > same problem? > > > Thanks for asking. I obtained a RT2880 device and gave it a try. It > > > hangs at boot without this patch, however selecting > > > > can you provide debug logs with memblock=debug for both good and bad > > kernels ? I'm curious what's the reason for failing allocation... > Sorry for taking a while to respond. See attached. > FWIW, it seems these are the lines that stand out in hang.log: > [0.00] memblock_reserve: [0x-0x07ff] > setup_arch+0x214/0x5d8 > [0.00] Wasting 1048576 bytes for tracking 32768 unused pages > ... > [0.00] reserved[0x0][0x-0x087137aa], 0x087137ab > bytes flags: 0x0 Just to be clear, good.log is mips-next tip (dbd815c0dcca) and hang.log is the same with MIPS_AUTO_PFN_OFFSET _NOT_ selected. Ilya
Re: [PATCH] vfio/pci: Add missing range check in vfio_pci_mmap
On Mon, 12 Apr 2021 23:41:24 +0200 "Christian A. Ehrhardt" wrote: > When mmaping an extra device region verify that the region index > derived from the mmap offset is valid. > > Fixes: a15b1883fee1 ("vfio_pci: Allow mapping extra regions") > Cc: sta...@vger.kernel.org > Signed-off-by: Christian A. Ehrhardt > --- > drivers/vfio/pci/vfio_pci.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) Reviewed-by: Cornelia Huck
Re: [PATCH v1 6/7] mfd: lpc_ich: Add support for pinctrl in non-ACPI system
Am Mon, 12 Apr 2021 20:34:45 +0300 schrieb Andy Shevchenko : > On Mon, Apr 12, 2021 at 07:16:53PM +0200, Henning Schild wrote: > > Am Mon, 12 Apr 2021 19:59:05 +0300 > > schrieb Andy Shevchenko : > > > On Mon, Apr 12, 2021 at 06:40:01PM +0200, Henning Schild wrote: > > > > Tan or Andy, > > > > > > > > maybe you can point me to a user of that patch. I guess there > > > > might be an out-of-tree driver or userland code on how to use > > > > the GPIOs from there. > > > > > > I'm confused. User of this patch is pinctrl-broxton driver. > > > It's in upstream. > > > > Should this appear in /sys/class/gpio as chip so that pins can be > > exported? > > No. Sysfs interface is deprecated. It should appear as /dev/gpiochip0 > or so. Ok, just found that there is a null pointer deref in the probe function of the pinctrl driver, looking into that. Meanwhile i think i will need a similar patch for pinctrl-sunrisepoint.c for that wdt, do you happen to have that as well? Or a spec where to find all the magic numbers. regards, Henning > > > That is what i tried and failed with. > > > > > Using GPIOs from it is something as done in a few drivers already > > > (Assuming we have no resources described in the ACPI). I.e. you > > > need to register in board file the GPIO mapping table with help of > > > devm_acpi_dev_add_driver_gpios() and use one of gpiod_get() > > > family of functions to request it. > > > > > > In case of LEDs you simple describe GPIO device name in lookup > > > table and that's it. The drivers/platform/x86/pcengines-apuv2.c > > > not the best but will give you an idea how to use "leds-gpio" > > > driver in board files. > > > > I am aware of that driver and had a look at it. In order to figure > > out the arguments for the macros/functions i was hoping for > > userland gpio "export", but maybe that does not work here ... > > For now i will assume that it does not show up in sysfs and can > > maybe still be used, and try to build on top. > > Just switch to use libgpiod and associated tools / bindings in user > space. Sysfs ABI is not being developed anymore. >
Re: [PATCH] MIPS: fix memory reservation for non-usermem setups
Hi Thomas, On Tue, Apr 6, 2021 at 6:18 AM Thomas Bogendoerfer wrote: > > On Sat, Apr 03, 2021 at 07:02:13PM -0700, Ilya Lipnitskiy wrote: > > Hi Mike, > > > > On Tue, Mar 16, 2021 at 11:33 PM Mike Rapoport wrote: > > > > > > Hi Ilya, > > > > > > On Tue, Mar 16, 2021 at 10:10:09PM -0700, Ilya Lipnitskiy wrote: > > > > Hi Thomas, > > > > > > > > On Fri, Mar 12, 2021 at 7:19 AM Thomas Bogendoerfer > > > > wrote: > > > > > > > > > > On Sun, Mar 07, 2021 at 11:40:30AM -0800, Ilya Lipnitskiy wrote: > > > > > > From: Tobias Wolf > > > > > > > > > > > > Commit 67a3ba25aa95 ("MIPS: Fix incorrect mem=X@Y handling") > > > > > > introduced a new > > > > > > issue for rt288x where "PHYS_OFFSET" is 0x0 but the calculated > > > > > > "ramstart" is > > > > > > not. As the prerequisite of custom memory map has been removed, > > > > > > this results > > > > > > in the full memory range of 0x0 - 0x800 to be marked as > > > > > > reserved for this > > > > > > platform. > > > > > > > > > > and where is the problem here ? > > > > Turns out this was already attempted to be upstreamed - not clear why > > > > it wasn't merged. Context: > > > > https://lore.kernel.org/linux-mips/6504517.U6H5IhoIOn@loki/ > > > > > > > > I hope the thread above helps you understand the problem. > > > > > > The memory initialization was a bit different then. Do you still see the > > > same problem? > > Thanks for asking. I obtained a RT2880 device and gave it a try. It > > hangs at boot without this patch, however selecting > > can you provide debug logs with memblock=debug for both good and bad > kernels ? I'm curious what's the reason for failing allocation... Sorry for taking a while to respond. See attached. FWIW, it seems these are the lines that stand out in hang.log: [0.00] memblock_reserve: [0x-0x07ff] setup_arch+0x214/0x5d8 [0.00] Wasting 1048576 bytes for tracking 32768 unused pages ... [0.00] reserved[0x0][0x-0x087137aa], 0x087137ab bytes flags: 0x0 Ilya [0.00] Linux version 5.12.0-rc2+ (builder@buildhost) (mipsel-openwrt-linux-musl-gcc (OpenWrt GCC 7.5.0 r4-7145a72d3ce2) 7.5.0, GNU ld (GNU Binutils) 2.31.1) #4 Mon Apr 12 23:41:18 PDT 2021 [0.00] SoC Type: Ralink RT2880 id:2 rev:1 [0.00] printk: bootconsole [early0] enabled [0.00] CPU0 revision is: 0001906c (MIPS 4KEc) [0.00] MIPS: machine is Belkin F5D8235 v1 [0.00] memblock_reserve: [0x085d84a8-0x085d9f5e] setup_arch+0x14c/0x5c0 [0.00] memblock_reserve: [0x0800-0x0871378f] setup_arch+0x220/0x5c0 [0.00] Initrd not found or empty - disabling initrd [0.00] memblock_alloc_try_nid: 6839 bytes align=0x40 nid=-1 from=0x max_addr=0x early_init_dt_alloc_memory_arch+0x40/0x84 [0.00] memblock_reserve: [0x087137c0-0x08715276] memblock_alloc_range_nid+0xf0/0x184 [0.00] memblock_alloc_try_nid: 21180 bytes align=0x4 nid=-1 from=0x max_addr=0x early_init_dt_alloc_memory_arch+0x40/0x84 [0.00] memblock_reserve: [0x08715278-0x0871a533] memblock_alloc_range_nid+0xf0/0x184 [0.00] memblock_alloc_try_nid: 27 bytes align=0x4 nid=-1 from=0x max_addr=0x early_init_dt_alloc_memory_arch+0x40/0x84 [0.00] memblock_reserve: [0x08713790-0x087137aa] memblock_alloc_range_nid+0xf0/0x184 [0.00] memblock_reserve: [0x08526000-0x08525fff] setup_arch+0x390/0x5c0 [0.00] memblock_alloc_try_nid: 32 bytes align=0x10 nid=-1 from=0x max_addr=0x setup_arch+0x4ec/0x5c0 [0.00] memblock_reserve: [0x0871a540-0x0871a55f] memblock_alloc_range_nid+0xf0/0x184 [0.00] Primary instruction cache 16kB, VIPT, 4-way, linesize 16 bytes. [0.00] Primary data cache 16kB, 4-way, VIPT, no aliases, linesize 16 bytes [0.00] Zone ranges: [0.00] Normal [mem 0x0800-0x09ff] [0.00] Movable zone start for each node [0.00] Early memory node ranges [0.00] node 0: [mem 0x0800-0x09ff] [0.00] Initmem setup node 0 [mem 0x0800-0x09ff] [0.00] memblock_alloc_try_nid: 262144 bytes align=0x10 nid=0 from=0x max_addr=0x alloc_node_mem_map.constprop.145+0x6c/0xd0 [0.00] memblock_reserve: [0x0871a560-0x0875a55f] memblock_alloc_range_nid+0xf0/0x184 [0.00] memblock_alloc_try_nid: 4 bytes align=0x10 nid=0 from=0x max_addr=0x setup_usemap+0x64/0x98 [0.00] memblock_reserve: [0x087137b0-0x087137b3] memblock_alloc_range_nid+0xf0/0x184 [0.00] MEMBLOCK configuration: [0.00] memory size = 0x0200 reserved size = 0x0075b542 [0.00] memory.cnt = 0x1 [0.00] memory[0x0] [0x0800-0x09ff], 0x0200 bytes flags: 0x0 [0.00] reserved.cnt = 0x6 [0.00] reserved[0x0] [0x-0x0fff], 0x1000 bytes flags: 0x0 [0.00] reserved[0x1] [0x0800-0x08
Re: [PATCH v2 resend] mm/memory_hotplug: Make unpopulated zones PCP structures unreachable during hot remove
On Mon 12-04-21 14:40:18, Vlastimil Babka wrote: > On 4/12/21 2:08 PM, Mel Gorman wrote: > > zone_pcp_reset allegedly protects against a race with drain_pages > > using local_irq_save but this is bogus. local_irq_save only operates > > on the local CPU. If memory hotplug is running on CPU A and drain_pages > > is running on CPU B, disabling IRQs on CPU A does not affect CPU B and > > offers no protection. > > > > This patch deletes IRQ disable/enable on the grounds that IRQs protect > > nothing and assumes the existing hotplug paths guarantees the PCP cannot be > > used after zone_pcp_enable(). That should be the case already because all > > the pages have been freed and there is no page to put on the PCP lists. > > > > Signed-off-by: Mel Gorman > > Yeah the irq disabling here is clearly bogus, so: > > Acked-by: Vlastimil Babka > > But I think Michal has a point that we might best leave the pagesets around, > by > a future change. I'm have some doubts that even with your reordering of the > reset/destroy after zonelist rebuild in v1 they cant't be reachable. We have > no > protection between zonelist rebuild and zonelist traversal, and that's why we > just leave pgdats around. > > So I can imagine a task racing with memory hotremove might see watermarks as > ok > in get_page_from_freelist() for the zone and proceeds to try_this_zone:, then > gets stalled/scheduled out while hotremove rebuilds the zonelist and destroys > the pcplists, then the first task is resumed and proceeds with > rmqueue_pcplist(). > > So that's very rare thus not urgent, and this patch doesn't make it less rare > so > not a reason to block it. Completely agreed here. Not an urgent thing to work on but something to look into long term. -- Michal Hocko SUSE Labs
Re: [PATCH v5] lib: add basic KUnit test for lib/math
On Tue, Apr 13, 2021 at 3:07 AM Daniel Latypov wrote: > > Add basic test coverage for files that don't require any config options: > * part of math.h (what seem to be the most commonly used macros) > * gcd.c > * lcm.c > * int_sqrt.c > * reciprocal_div.c > (Ignored int_pow.c since it's a simple textbook algorithm.) > > These tests aren't particularly interesting, but they > * provide short and simple examples of parameterized tests > * provide a place to add tests for any new files in this dir > * are written so adding new test cases to cover edge cases should be easy > * looking at code coverage, we hit all the branches in the .c files > > Signed-off-by: Daniel Latypov This looks good to me. A few comments/observations below, but nothing that I think should actually block this. Reviewed-by: David Gow -- David > --- > Changes since v4: > * add in test cases for some math.h macros (abs, round_up/round_down, > div_round_down/closest) > * use parameterized testing less to keep things terser > > Changes since v3: > * fix `checkpatch.pl --strict` warnings > * add test cases for gcd(0,0) and lcm(0,0) > * minor: don't test both gcd(a,b) and gcd(b,a) when a == b > > Changes since v2: mv math_test.c => math_kunit.c > > Changes since v1: > * Rebase and rewrite to use the new parameterized testing support. > * misc: fix overflow in literal and inline int_sqrt format string. > * related: commit 1f0e943df68a ("Documentation: kunit: provide guidance > for testing many inputs") was merged explaining the patterns shown here. > * there's an in-flight patch to update it for parameterized testing. > > v1: https://lore.kernel.org/lkml/20201019224556.3536790-1-dlaty...@google.com/ > --- > lib/math/Kconfig | 5 + > lib/math/Makefile | 2 + > lib/math/math_kunit.c | 264 ++ > 3 files changed, 271 insertions(+) > create mode 100644 lib/math/math_kunit.c > > diff --git a/lib/math/Kconfig b/lib/math/Kconfig > index f19bc9734fa7..6ba8680439c1 100644 > --- a/lib/math/Kconfig > +++ b/lib/math/Kconfig > @@ -15,3 +15,8 @@ config PRIME_NUMBERS > > config RATIONAL > bool > + > +config MATH_KUNIT_TEST > + tristate "KUnit test for lib/math" if !KUNIT_ALL_TESTS > + default KUNIT_ALL_TESTS > + depends on KUNIT This could have a description of the test and KUnit here, as mentioned in the style guide doc: https://www.kernel.org/doc/html/latest/dev-tools/kunit/style.html#test-kconfig-entries (I think it's sufficiently self explanatory that it's not essential, but it could be nice to have a more detailed description of the things being tested than just "lib/math".) > diff --git a/lib/math/Makefile b/lib/math/Makefile > index be6909e943bd..30abb7a8d564 100644 > --- a/lib/math/Makefile > +++ b/lib/math/Makefile > @@ -4,3 +4,5 @@ obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o > reciprocal_div.o > obj-$(CONFIG_CORDIC) += cordic.o > obj-$(CONFIG_PRIME_NUMBERS)+= prime_numbers.o > obj-$(CONFIG_RATIONAL) += rational.o > + > +obj-$(CONFIG_MATH_KUNIT_TEST) += math_kunit.o > diff --git a/lib/math/math_kunit.c b/lib/math/math_kunit.c > new file mode 100644 > index ..80a087a32884 > --- /dev/null > +++ b/lib/math/math_kunit.c > @@ -0,0 +1,264 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Simple KUnit suite for math helper funcs that are always enabled. > + * > + * Copyright (C) 2020, Google LLC. > + * Author: Daniel Latypov > + */ > + > +#include > +#include > +#include > +#include > +#include > + > +static void abs_test(struct kunit *test) > +{ There's something weird about taking the absolute values of char literals. I'm not sure if it's better to case integer literals (like with 'short' below), or keep it as-is. > + KUNIT_EXPECT_EQ(test, abs('\0'), '\0'); > + KUNIT_EXPECT_EQ(test, abs('a'), 'a'); > + KUNIT_EXPECT_EQ(test, abs(-'a'), 'a'); > + > + /* The expression in the macro is actually promoted to an int. */ > + KUNIT_EXPECT_EQ(test, abs((short)0), 0); > + KUNIT_EXPECT_EQ(test, abs((short)42), 42); > + KUNIT_EXPECT_EQ(test, abs((short)-42), 42); > + > + KUNIT_EXPECT_EQ(test, abs(0), 0); > + KUNIT_EXPECT_EQ(test, abs(42), 42); > + KUNIT_EXPECT_EQ(test, abs(-42), 42); > + > + KUNIT_EXPECT_EQ(test, abs(0L), 0L); > + KUNIT_EXPECT_EQ(test, abs(42L), 42L); > + KUNIT_EXPECT_EQ(test, abs(-42L), 42L); > + > + KUNIT_EXPECT_EQ(test, abs(0LL), 0LL); > + KUNIT_EXPECT_EQ(test, abs(42LL), 42LL); > + KUNIT_EXPECT_EQ(test, abs(-42LL), 42LL); > + > + /* Unsigned types get casted to signed. */ > + KUNIT_EXPECT_EQ(test, abs(0ULL), 0LL); > + KUNIT_EXPECT_EQ(test, abs(42ULL), 42LL); A part of me is curious what the result is for -0x8000, but I guess that's not defined, so shouldn't be tested. :-) > +} > + > +static void int_sqrt_test(struct kunit *test) > +{ > + KUNIT_EXPECT_EQ(test, int_sqrt(0U
Re: [PATCH v2 resend] mm/memory_hotplug: Make unpopulated zones PCP structures unreachable during hot remove
On Mon 12-04-21 13:08:42, Mel Gorman wrote: > zone_pcp_reset allegedly protects against a race with drain_pages > using local_irq_save but this is bogus. local_irq_save only operates > on the local CPU. If memory hotplug is running on CPU A and drain_pages > is running on CPU B, disabling IRQs on CPU A does not affect CPU B and > offers no protection. > > This patch deletes IRQ disable/enable on the grounds that IRQs protect > nothing and assumes the existing hotplug paths guarantees the PCP cannot be > used after zone_pcp_enable(). That should be the case already because all > the pages have been freed and there is no page to put on the PCP lists. Yes, that is the case since ec6e8c7e0314 ("mm, page_alloc: disable pcplists during memory offline"). Prior to this commit the behavior was undefined but full zone/node hotremove is rare enough that an existing race was likely never observed. Acked-by: Michal Hocko Thanks! > Signed-off-by: Mel Gorman > --- > Resending for email address correction and adding lists > > Changelog since v1 > o Minimal fix > > mm/page_alloc.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 5e8aedb64b57..9bf0db982f14 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -8952,12 +8952,9 @@ void zone_pcp_enable(struct zone *zone) > > void zone_pcp_reset(struct zone *zone) > { > - unsigned long flags; > int cpu; > struct per_cpu_pageset *pset; > > - /* avoid races with drain_pages() */ > - local_irq_save(flags); > if (zone->pageset != &boot_pageset) { > for_each_online_cpu(cpu) { > pset = per_cpu_ptr(zone->pageset, cpu); > @@ -8966,7 +8963,6 @@ void zone_pcp_reset(struct zone *zone) > free_percpu(zone->pageset); > zone->pageset = &boot_pageset; > } > - local_irq_restore(flags); > } > > #ifdef CONFIG_MEMORY_HOTREMOVE -- Michal Hocko SUSE Labs
[PATCH] stm class: remove useless function
Fix the following clang warning: drivers/hwtracing/stm/policy.c:60:21: warning: unused function 'stp_policy_node_name' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- drivers/hwtracing/stm/policy.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c index 603b4a99..42103c3 100644 --- a/drivers/hwtracing/stm/policy.c +++ b/drivers/hwtracing/stm/policy.c @@ -57,11 +57,6 @@ void stp_policy_node_get_ranges(struct stp_policy_node *policy_node, *cend = policy_node->last_channel; } -static inline char *stp_policy_node_name(struct stp_policy_node *policy_node) -{ - return policy_node->group.cg_item.ci_name ? : ""; -} - static inline struct stp_policy *to_stp_policy(struct config_item *item) { return item ? -- 1.8.3.1
RE: [PATCH v7 1/2] platform/x86: dell-privacy: Add support for Dell hardware privacy
Hi , > -Original Message- > From: Amadeusz Sławiński > Sent: 2021年4月12日 18:40 > To: Yuan, Perry; po...@protonmail.com; pierre- > louis.boss...@linux.intel.com; oder_ch...@realtek.com; pe...@perex.cz; > ti...@suse.com; hdego...@redhat.com; mgr...@linux.intel.com > Cc: alsa-de...@alsa-project.org; linux-kernel@vger.kernel.org; > lgirdw...@gmail.com; platform-driver-...@vger.kernel.org; > broo...@kernel.org; Dell Client Kernel; mario.limoncie...@outlook.com > Subject: Re: [PATCH v7 1/2] platform/x86: dell-privacy: Add support for Dell > hardware privacy > > > [EXTERNAL EMAIL] > > On 4/12/2021 11:19 AM, Perry Yuan wrote: > > From: Perry Yuan > > > > (...) > > > diff --git a/drivers/platform/x86/dell/dell-laptop.c > > b/drivers/platform/x86/dell/dell-laptop.c > > index 70edc5bb3a14..e7ffc0b81208 100644 > > --- a/drivers/platform/x86/dell/dell-laptop.c > > +++ b/drivers/platform/x86/dell/dell-laptop.c > > @@ -31,6 +31,8 @@ > > #include "dell-rbtn.h" > > #include "dell-smbios.h" > > > > +#include "dell-privacy-wmi.h" > > + > > struct quirk_entry { > > bool touchpad_led; > > bool kbd_led_not_present; > > @@ -90,6 +92,7 @@ static struct rfkill *wifi_rfkill; > > static struct rfkill *bluetooth_rfkill; > > static struct rfkill *wwan_rfkill; > > static bool force_rfkill; > > +static bool has_privacy; > > > > module_param(force_rfkill, bool, 0444); > > MODULE_PARM_DESC(force_rfkill, "enable rfkill on non whitelisted > > models"); @@ -2206,10 +2209,16 @@ static int __init dell_init(void) > > > > if (dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE) && > > dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE)) { > > - micmute_led_cdev.brightness = > ledtrig_audio_get(LED_AUDIO_MICMUTE); > > - ret = led_classdev_register(&platform_device->dev, > &micmute_led_cdev); > > - if (ret < 0) > > - goto fail_led; > > + if (dell_privacy_present()) > > + has_privacy = true; > > + else > > + has_privacy = false; > > Bit, of nitpicking, but you can write above shorter: > has_privacy = dell_privacy_present(); Good point, changed the code as you suggested. Thank you. Perry.
[PATCH v8] RISC-V: enable XIP
From: Vitaly Wool Introduce XIP (eXecute In Place) support for RISC-V platforms. It allows code to be executed directly from non-volatile storage directly addressable by the CPU, such as QSPI NOR flash which can be found on many RISC-V platforms. This makes way for significant optimization of RAM footprint. The XIP kernel is not compressed since it has to run directly from flash, so it will occupy more space on the non-volatile storage. The physical flash address used to link the kernel object files and for storing it has to be known at compile time and is represented by a Kconfig option. XIP on RISC-V will for the time being only work on MMU-enabled kernels. Signed-off-by: Alexandre Ghiti [ Rebase on top of "Move kernel mapping outside the linear mapping" ] Signed-off-by: Vitaly Wool --- arch/riscv/Kconfig | 55 +++- arch/riscv/Makefile | 8 +- arch/riscv/boot/Makefile| 13 +++ arch/riscv/include/asm/page.h | 21 + arch/riscv/include/asm/pgtable.h| 25 +- arch/riscv/kernel/head.S| 46 +- arch/riscv/kernel/head.h| 3 + arch/riscv/kernel/setup.c | 10 ++- arch/riscv/kernel/vmlinux-xip.lds.S | 133 arch/riscv/kernel/vmlinux.lds.S | 6 ++ arch/riscv/mm/init.c| 115 ++-- 11 files changed, 418 insertions(+), 17 deletions(-) create mode 100644 arch/riscv/kernel/vmlinux-xip.lds.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8ea60a0a19ae..7c7efdd67a10 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -28,7 +28,7 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY - select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT @@ -441,7 +441,7 @@ config EFI_STUB config EFI bool "UEFI runtime support" - depends on OF + depends on OF && !XIP_KERNEL select LIBFDT select UCS2_STRING select EFI_PARAMS_FROM_FDT @@ -465,11 +465,60 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + +config PHYS_RAM_BASE + hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED + default "0x8000" + help + This is the physical address of RAM in the system. It has to be + explicitly specified to run early relocations of read-write data + from flash to RAM. + +config XIP_KERNEL + bool "Kernel Execute-In-Place from ROM" + depends on MMU && SPARSEMEM + select PHYS_RAM_BASE_FIXED + help + Execute-In-Place allows the kernel to run from non-volatile storage + directly addressable by the CPU, such as NOR flash. This saves RAM + space since the text section of the kernel is not loaded from flash + to RAM. Read-write sections, such as the data section and stack, + are still copied to RAM. The XIP kernel is not compressed since + it has to run directly from flash, so it will take more space to + store it. The flash address used to link the kernel object files, + and for storing it, is configuration dependent. Therefore, if you + say Y here, you must know the proper physical address where to + store the kernel image depending on your own flash memory usage. + + Also note that the make target becomes "make xipImage" rather than + "make zImage" or "make Image". The final kernel binary to put in + ROM memory will be arch/riscv/boot/xipImage. + + SPARSEMEM is required because the kernel text and rodata that are + flash resident are not backed by memmap, then any attempt to get + a struct page on those regions will trigger a fault. + + If unsure, say N. + +config XIP_PHYS_ADDR + hex "XIP Kernel Physical Location" + depends on XIP_KERNEL + default "0x2100" + help + This is the physical address in your flash memory the kernel will + be linked for and stored to. This address is dependent on your + own flash usage. + endmenu config BUILTIN_DTB - def_bool n + bool depends on OF + default y if XIP_KERNEL menu "Power management options" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1368d943f1f3..8fcbec03974d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -82,7 +82,11 @@ CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make boot :=
Re: [PATCH RESEND v1 0/4] powerpc/vdso: Add support for time namespaces
Thomas Gleixner writes: > On Wed, Mar 31 2021 at 16:48, Christophe Leroy wrote: >> [Sorry, resending with complete destination list, I used the wrong script on >> the first delivery] >> >> This series adds support for time namespaces on powerpc. >> >> All timens selftests are successfull. > > If PPC people want to pick up the whole lot, no objections from my side. Thanks, will do. cheers
Re: [PATCH for-next v3 0/2] Introduce rdma_set_min_rnr_timer() and use it in RDS
On Mon, Apr 12, 2021 at 07:58:47PM -0300, Jason Gunthorpe wrote: > On Wed, Mar 31, 2021 at 08:43:12PM +0200, Håkon Bugge wrote: > > ib_modify_qp() is an expensive operation on some HCAs running > > virtualized. This series removes two ib_modify_qp() calls from RDS. > > > > I am sending this as a v3, even though it is the first sent to > > net. This because the IB Core commit has reach v3. > > > > Håkon Bugge (2): > > IB/cma: Introduce rdma_set_min_rnr_timer() > > rds: ib: Remove two ib_modify_qp() calls > > Applied to rdma for-next, thanks Jason, It should be + WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT); and not + if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT)) + return -EINVAL; Thanks > > Jason
Re: [PATCH 1/4] dt-bindings: Add bindings for aspeed pwm-tach.
Hi, Best Regards, Billy Tsai On 2021/4/12, 8:55 PM,Uwe Kleine-Königwrote: > Hello, On Mon, Apr 12, 2021 at 05:54:54PM +0800, Billy Tsai wrote: > + - Billy Tsai > I object because the MTA at aspeedtech.com doesn't know this email > address. This is typo error, my email address is billy_t...@aspeedtech.com I will fix it at v2. > Best regards > Uwe > -- > Pengutronix e.K. | Uwe Kleine-König| > Industrial Linux Solutions | https://www.pengutronix.de/ |
arch/mips/n64/init.c:57:38: sparse: sparse: incorrect type in argument 2 (different address spaces)
tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 89698becf06d341a700913c3d89ce2a914af69a2 commit: baec970aa5ba11099ad7a91773350c91fb2113f0 mips: Add N64 machine type date: 3 months ago config: mips-randconfig-s032-20210413 (attached as .config) compiler: mips64-linux-gcc (GCC) 9.3.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.3-280-g2cd6d34e-dirty # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=baec970aa5ba11099ad7a91773350c91fb2113f0 git remote add linus https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git git fetch --no-tags linus master git checkout baec970aa5ba11099ad7a91773350c91fb2113f0 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=mips If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot sparse warnings: (new ones prefixed by >>) command-line: note: in included file: builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_ACQUIRE redefined builtin:0:0: sparse: this was the original definition builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_SEQ_CST redefined builtin:0:0: sparse: this was the original definition builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_ACQ_REL redefined builtin:0:0: sparse: this was the original definition builtin:1:9: sparse: sparse: preprocessor token __ATOMIC_RELEASE redefined builtin:0:0: sparse: this was the original definition >> arch/mips/n64/init.c:57:38: sparse: sparse: incorrect type in argument 2 >> (different address spaces) @@ expected void volatile [noderef] __iomem >> *mem @@ got unsigned int [usertype] * @@ arch/mips/n64/init.c:57:38: sparse: expected void volatile [noderef] __iomem *mem arch/mips/n64/init.c:57:38: sparse: got unsigned int [usertype] * vim +57 arch/mips/n64/init.c 54 55 static void __init n64rdp_write_reg(const u8 reg, const u32 value) 56 { > 57 __raw_writel(value, REG_BASE + reg); 58 } 59 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
Subject: [PATCH v2] staging: media: meson: vdec: declare u32 as static const appropriately
Declared 32 bit unsigned int as static constant inside a function appropriately. Reported-by: kernel test robot Signed-off-by: Mitali Borkar --- Changes from v1:- Rectified the mistake by declaring u32 as static const properly. drivers/staging/media/meson/vdec/codec_h264.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/meson/vdec/codec_h264.c b/drivers/staging/media/meson/vdec/codec_h264.c index ea86e9e1c447..80141b89a9f6 100644 --- a/drivers/staging/media/meson/vdec/codec_h264.c +++ b/drivers/staging/media/meson/vdec/codec_h264.c @@ -287,8 +287,8 @@ static void codec_h264_resume(struct amvdec_session *sess) struct amvdec_core *core = sess->core; struct codec_h264 *h264 = sess->priv; u32 mb_width, mb_height, mb_total; - static const u32[] canvas3 = { ANCO_CANVAS_ADDR, 0 }; - static const u32[] canvas4 = { 24, 0 }; + static const u32 canvas3[] = { ANCO_CANVAS_ADDR, 0 }; + static const u32 canvas4[] = { 24, 0 }; amvdec_set_canvases(sess, canvas3, canvas4); -- 2.30.2
[PATCH] kernel:irq:manage: request threaded irq with a specified priority
In general, irq handler thread will be assigned a default priority which is MAX_RT_PRIO/2, as a result, no one can preempt others. Here is the case I found in a real project, an interrupt int_a is coming, wakes up its handler handler_a and handler_a wakes up a userspace RT process task_a. However, if another irq handler handler_b which has nothing to do with any RT tasks is running when int_a is coming, handler_a can't preempt handler_b, as a result, task_a can't be waken up immediately as expected until handler_b gives up cpu voluntarily. In this case, determinism breaks. Therefore, this patch introduce a new api to give drivers a chance to assign expected priorities to their irq handler thread. Signed-off-by: Song Chen --- include/linux/interrupt.h | 7 + include/linux/sched.h | 1 + include/linux/sched/prio.h | 1 + kernel/irq/manage.c| 64 +++--- kernel/sched/core.c| 11 5 files changed, 80 insertions(+), 4 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 967e257..5ab9169 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -121,6 +121,7 @@ struct irqaction { unsigned long thread_mask; const char *name; struct proc_dir_entry *dir; + int prio; } cacheline_internodealigned_in_smp; extern irqreturn_t no_action(int cpl, void *dev_id); @@ -136,6 +137,12 @@ extern irqreturn_t no_action(int cpl, void *dev_id); #define IRQ_NOTCONNECTED (1U << 31) extern int __must_check +request_threaded_irq_with_prio(unsigned int irq, irq_handler_t handler, +irq_handler_t thread_fn, +unsigned long flags, const char *name, void *dev, +int prio); + +extern int __must_check request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long flags, const char *name, void *dev); diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb2..50edae9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1711,6 +1711,7 @@ extern int sched_setscheduler(struct task_struct *, int, const struct sched_para extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); +extern void sched_set_fifo_with_prio(struct task_struct *p, int prio); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index ab83d85..1e1186e 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -15,6 +15,7 @@ #define MAX_RT_PRIO100 +#define DEFAULT_RT_PRIO(MAX_RT_PRIO / 2) #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 21ea370..111b8ce 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1394,7 +1394,7 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) if (IS_ERR(t)) return PTR_ERR(t); - sched_set_fifo(t); + sched_set_fifo_with_prio(t, new->prio); /* * We keep the reference to the task struct even if @@ -2032,7 +2032,7 @@ const void *free_nmi(unsigned int irq, void *dev_id) } /** - * request_threaded_irq - allocate an interrupt line + * request_threaded_irq_with_prio - allocate an interrupt line * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. * Primary handler for threaded interrupts @@ -2043,6 +2043,7 @@ const void *free_nmi(unsigned int irq, void *dev_id) * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function + * @prio: priority of the irq handler thread * * This call allocates interrupt resources and enables the * interrupt line and IRQ handling. From the point this @@ -2067,15 +2068,18 @@ const void *free_nmi(unsigned int irq, void *dev_id) * If your interrupt is shared you must pass a non NULL dev_id * as this is required when freeing the interrupt. * + * If you want to assign a priority for your irq handler thread + * instead of default value, you need to supply @prio. + * * Flags: * * IRQF_SHARED Interrupt is shared * IRQF_TRIGGER_* Specify active edge(s) or level * */ -int request_threaded_irq(unsigned int irq, irq_handler_t handler, +int request_threaded_irq_with_prio(uns
[PATCH] x86: Accelerate copy_page with non-temporal in X86
I'm using AEP with dax_kmem drvier, and AEP is export as a NUMA node in my system. I will move cold pages from DRAM node to AEP node with move_pages system call. With old "rep movsq', it costs 2030ms to move 1 GB pages. With "movnti", it only cost about 890ms to move 1GB pages. I also test move 1GB pages from AEP node to DRAM node. But the result is unexpected. "rep movesq" cost about 372 ms while "movnti" cost about 477ms. As said in X86 , "movnti" could avoid "polluting the caches" in this situaction. I don't know if it's general result or just happening in my machine. Hardware information is as follow: CPU: Intel(R) Xeon(R) Gold 6266C CPU @ 3.00GHz DRAM: Memory Device Array Handle: 0x0035 Error Information Handle: Not Provided Total Width: 72 bits Data Width: 64 bits Size: 64 GB Form Factor: DIMM Set: None Locator: DIMM130 J40 Bank Locator: _Node1_Channel3_Dimm0 Type: DDR4 Type Detail: Synchronous Registered (Buffered) Speed: 2933 MT/s Manufacturer: Samsung Serial Number: 03B71EB0 Asset Tag: 1950 Part Number: M393A8G40MB2-CVF Rank: 2 Configured Memory Speed: 2666 MT/s Minimum Voltage: 1.2 V Maximum Voltage: 1.2 V Configured Voltage: 1.2 V Memory Technology: DRAM Memory Operating Mode Capability: Volatile memory Firmware Version: Module Manufacturer ID: Bank 1, Hex 0xCE Module Product ID: Unknown Memory Subsystem Controller Manufacturer ID: Unknown Memory Subsystem Controller Product ID: Unknown Non-Volatile Size: None Volatile Size: 64 GB Cache Size: None Logical Size: None AEP: Memory Device Array Handle: 0x0035 Error Information Handle: Not Provided Total Width: 72 bits Data Width: 64 bits Size: 128 GB Form Factor: DIMM Set: None Locator: DIMM131 J41 Bank Locator: _Node1_Channel3_Dimm1 Type: Logical non-volatile device Type Detail: Synchronous Non-Volatile LRDIMM Speed: 2666 MT/s Manufacturer: Intel Serial Number: 6803 Asset Tag: 1949 Part Number: NMA1XXD128GPS Rank: 1 Configured Memory Speed: 2666 MT/s Minimum Voltage: 1.2 V Maximum Voltage: 1.2 V Configured Voltage: 1.2 V Memory Technology: Intel persistent memory Memory Operating Mode Capability: Volatile memory Byte-accessible persistent memory Firmware Version: 5355 Module Manufacturer ID: Bank 1, Hex 0x89 Module Product ID: 0x0556 Memory Subsystem Controller Manufacturer ID: Bank 1, Hex 0x89 Memory Subsystem Controller Product ID: 0x097A Non-Volatile Size: 126 GB Volatile Size: None Cache Size: None Logical Size: None Memory dimm topoloygy: AEP | DRAMDRAMDRAM | | | |---|---| CPU |---|---| | | | DRAMDRAMDRAM Signed-off-by: Kemeng Shi --- arch/x86/lib/copy_page_64.S | 73 - 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 2402d4c489d2..69389b4aeeed 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -14,7 +14,8 @@ */ ALIGN SYM_FUNC_START(copy_page) - ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD + ALTERNATIVE_2 "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD, \ + "jmp copy_page_nt", X86_FEATURE_XMM2 movl$4096/8, %ecx rep movsq ret @@ -87,3 +88,73 @@ SYM_FUNC_START_LOCAL(copy_page_regs) addq$2*8, %rsp ret SYM_FUNC_END(copy_page_regs) + +SYM_FUNC_START_LOCAL(copy_page_nt) + subq$2*8, %rsp + movq%rbx, (%rsp) + movq%r12, 1*8(%rsp) + + movl$(4096/64)-5, %ecx + .p2align 4 +.LoopNT64: + decl%ecx + + movq0x8*0(%rsi), %rax + movq0x8*1(%rsi), %rbx + movq0x8*2(%rsi), %rdx + movq0x8*3(%rsi), %r8 + movq0x8*4(%rsi), %r9 + movq0x8*5(%rsi), %r10 + movq0x8*6(%rsi), %r11 + movq0x8*7(%rsi), %r12 + + prefetcht0 5*64(%rsi) + + movnti %rax, 0x8*0(%rdi) + movnti %rbx, 0x8*1(%rdi) + movnti %rdx, 0x8*2(%rdi) + movnti %r8, 0x8*3(%rdi) + movnti %r9, 0x8*4(%rdi) + movnti %r10, 0x8*5(%rdi) + movnti %r11, 0x8*6(%rdi) + movnti %r12, 0x8*7(%rdi) + + leaq64(%rdi), %rdi + leaq64(%rsi), %rsi + jnz .LoopNT64 + + movl$5, %ecx + .p2align 4 +.LoopNT2: + decl%ecx + + movq0x8*0(%rsi), %rax + movq0x8*1(%rsi), %rbx + movq0x8*2(%rsi), %rdx + movq0x8*3(%rsi), %r8
[PATCH 5/8] MIPS: pci-legacy: stop using of_pci_range_to_resource
Mirror commit aeba3731b150 ("powerpc/pci: Fix IO space breakage after of_pci_range_to_resource() change"). Most MIPS platforms do not define PCI_IOBASE, nor implement pci_address_to_pio(). Moreover, IO_SPACE_LIMIT is 0x for most MIPS platforms. of_pci_range_to_resource passes the _start address_ of the IO range into pci_address_to_pio, which then checks it against IO_SPACE_LIMIT and fails, because for MIPS platforms that use pci-legacy (pci-lantiq, pci-rt3883, pci-mt7620), IO ranges start much higher than 0x. In fact, pci-mt7621 in staging already works around this problem, see commit 09dd629eeabb ("staging: mt7621-pci: fix io space and properly set resource limits") So just stop using of_pci_range_to_resource, which does not work for MIPS. Fixes PCI errors like: pci_bus :00: root bus resource [io 0x] Fixes: 0b0b0893d49b ("of/pci: Fix the conversion of IO ranges into IO resources") Signed-off-by: Ilya Lipnitskiy Cc: Liviu Dudau --- arch/mips/pci/pci-legacy.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 39052de915f3..3a909194284a 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -166,8 +166,13 @@ void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node) res = hose->mem_resource; break; } - if (res != NULL) - of_pci_range_to_resource(&range, node, res); + if (res != NULL) { + res->name = node->full_name; + res->flags = range.flags; + res->start = range.cpu_addr; + res->end = range.cpu_addr + range.size - 1; + res->parent = res->child = res->sibling = NULL; + } } } -- 2.31.1
[PATCH 8/8] MIPS: pci-legacy: use generic pci_enable_resources
Follow the reasoning from commit 842de40d93e0 ("PCI: add generic pci_enable_resources()"): The only functional difference from the MIPS version is that the generic one uses "!r->parent" to check for resource collisions instead of "!r->start && r->end". That should have no effect on any pci-legacy driver. Suggested-by: Bjorn Helgaas Signed-off-by: Ilya Lipnitskiy --- arch/mips/pci/pci-legacy.c | 40 ++ 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 78c22987bef0..c24226ea0a6e 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -241,47 +241,11 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); -static int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - pci_err(dev, - "can't enable device: resource collisions\n"); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { - int err; + int err = pci_enable_resources(dev, mask); - if ((err = pcibios_enable_resources(dev, mask)) < 0) + if (err < 0) return err; return pcibios_plat_dev_init(dev); -- 2.31.1
[PATCH 1/8] MIPS: pci-rt2880: fix slot 0 configuration
pci_fixup_irqs() used to call pcibios_map_irq on every PCI device, which for RT2880 included bus 0 slot 0. After pci_fixup_irqs() got removed, only slots/funcs with devices attached would be called. While arguably the right thing, that left no chance for this driver to ever initialize slot 0, effectively bricking PCI and USB on RT2880 devices such as the Belkin F5D8235-4 v1. Slot 0 configuration needs to happen after PCI bus enumeration, but before any device at slot 0x11 (func 0 or 1) is talked to. That was determined empirically by testing on a Belkin F5D8235-4 v1 device. A minimal BAR 0 config write followed by read, then setting slot 0 PCI_COMMAND to MASTER | IO | MEMORY is all that seems to be required for proper functionality. Tested by ensuring that full- and high-speed USB devices get enumerated on the Belkin F5D8235-4 v1 (with an out of tree DTS file from OpenWrt). Fixes: 04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Signed-off-by: Ilya Lipnitskiy Cc: Lorenzo Pieralisi Cc: Tobias Wolf Cc: # v4.14+ --- arch/mips/pci/pci-rt2880.c | 50 +- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index e1f12e398136..19f7860fb28b 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -66,9 +66,13 @@ static int rt2880_pci_config_read(struct pci_bus *bus, unsigned int devfn, unsigned long flags; u32 address; u32 data; + int busn = 0; - address = rt2880_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn), -PCI_FUNC(devfn), where); + if (bus) + busn = bus->number; + + address = rt2880_pci_get_cfgaddr(busn, PCI_SLOT(devfn), PCI_FUNC(devfn), +where); spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); @@ -96,9 +100,13 @@ static int rt2880_pci_config_write(struct pci_bus *bus, unsigned int devfn, unsigned long flags; u32 address; u32 data; + int busn = 0; + + if (bus) + busn = bus->number; - address = rt2880_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn), -PCI_FUNC(devfn), where); + address = rt2880_pci_get_cfgaddr(busn, PCI_SLOT(devfn), PCI_FUNC(devfn), +where); spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); @@ -180,7 +188,6 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - u16 cmd; int irq = -1; if (dev->bus->number != 0) @@ -188,8 +195,6 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) switch (PCI_SLOT(dev->devfn)) { case 0x00: - rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x0800); - (void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0); break; case 0x11: irq = RT288X_CPU_IRQ_PCI; @@ -201,16 +206,6 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) break; } - pci_write_config_byte((struct pci_dev *) dev, - PCI_CACHE_LINE_SIZE, 0x14); - pci_write_config_byte((struct pci_dev *) dev, PCI_LATENCY_TIMER, 0xFF); - pci_read_config_word((struct pci_dev *) dev, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_INVALIDATE | PCI_COMMAND_FAST_BACK | - PCI_COMMAND_SERR | PCI_COMMAND_WAIT | PCI_COMMAND_PARITY; - pci_write_config_word((struct pci_dev *) dev, PCI_COMMAND, cmd); - pci_write_config_byte((struct pci_dev *) dev, PCI_INTERRUPT_LINE, - dev->irq); return irq; } @@ -251,6 +246,27 @@ static int rt288x_pci_probe(struct platform_device *pdev) int pcibios_plat_dev_init(struct pci_dev *dev) { + static bool slot0_init; + + /* +* Nobody seems to initialize slot 0, but this platform requires it, so +* do it once when some other slot is being enabled. The PCI subsystem +* should configure other slots properly, so no need to do anything +* special for those. +*/ + if (!slot0_init) { + u32 cmd; + + slot0_init = true; + + rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x0800); + (void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0); + + rt2880_pci_config_read(NULL, 0, PCI_COMMAND, 2, &cmd); + cmd |= PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY; + rt2880_pci_config_write(NULL, 0, PCI_COMMAND, 2, cmd); + } + return 0; } -- 2.31.1
[PATCH 2/8] MIPS: pci-rt2880: remove unneeded locks
Mirror pci-rt3883 fix from commit e5067c718b3a ("MIPS: pci-rt3883: Remove odd locking in PCI config space access code"). pci-rt2880 shares the driver layout with pci-rt3883 and the same reasons apply. Caller (generic PCI code) already does proper locking, so no need to add another one here. Local PCI read/write functions are never called simultaneously, also they do not require synchronization with the PCI controller ops, since they are used before the controller registration. Suggested-by: Sergey Ryazanov Signed-off-by: Ilya Lipnitskiy --- arch/mips/pci/pci-rt2880.c | 13 - 1 file changed, 13 deletions(-) diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index 19f7860fb28b..b4ee07cbcf2a 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -41,7 +41,6 @@ #define RT2880_PCI_REG_ARBCTL 0x80 static void __iomem *rt2880_pci_base; -static DEFINE_SPINLOCK(rt2880_pci_lock); static u32 rt2880_pci_reg_read(u32 reg) { @@ -63,7 +62,6 @@ static inline u32 rt2880_pci_get_cfgaddr(unsigned int bus, unsigned int slot, static int rt2880_pci_config_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - unsigned long flags; u32 address; u32 data; int busn = 0; @@ -74,10 +72,8 @@ static int rt2880_pci_config_read(struct pci_bus *bus, unsigned int devfn, address = rt2880_pci_get_cfgaddr(busn, PCI_SLOT(devfn), PCI_FUNC(devfn), where); - spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); data = rt2880_pci_reg_read(RT2880_PCI_REG_CONFIG_DATA); - spin_unlock_irqrestore(&rt2880_pci_lock, flags); switch (size) { case 1: @@ -97,7 +93,6 @@ static int rt2880_pci_config_read(struct pci_bus *bus, unsigned int devfn, static int rt2880_pci_config_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - unsigned long flags; u32 address; u32 data; int busn = 0; @@ -108,7 +103,6 @@ static int rt2880_pci_config_write(struct pci_bus *bus, unsigned int devfn, address = rt2880_pci_get_cfgaddr(busn, PCI_SLOT(devfn), PCI_FUNC(devfn), where); - spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); data = rt2880_pci_reg_read(RT2880_PCI_REG_CONFIG_DATA); @@ -127,7 +121,6 @@ static int rt2880_pci_config_write(struct pci_bus *bus, unsigned int devfn, } rt2880_pci_reg_write(data, RT2880_PCI_REG_CONFIG_DATA); - spin_unlock_irqrestore(&rt2880_pci_lock, flags); return PCIBIOS_SUCCESSFUL; } @@ -159,31 +152,25 @@ static struct pci_controller rt2880_pci_controller = { static inline u32 rt2880_pci_read_u32(unsigned long reg) { - unsigned long flags; u32 address; u32 ret; address = rt2880_pci_get_cfgaddr(0, 0, 0, reg); - spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); ret = rt2880_pci_reg_read(RT2880_PCI_REG_CONFIG_DATA); - spin_unlock_irqrestore(&rt2880_pci_lock, flags); return ret; } static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) { - unsigned long flags; u32 address; address = rt2880_pci_get_cfgaddr(0, 0, 0, reg); - spin_lock_irqsave(&rt2880_pci_lock, flags); rt2880_pci_reg_write(address, RT2880_PCI_REG_CONFIG_ADDR); rt2880_pci_reg_write(val, RT2880_PCI_REG_CONFIG_DATA); - spin_unlock_irqrestore(&rt2880_pci_lock, flags); } int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -- 2.31.1
Re: linux-next: Tree for Apr 9 (x86 boot problem)
On 4/12/21 11:06 PM, Mike Rapoport wrote: > Hi Randy, > > On Mon, Apr 12, 2021 at 01:53:34PM -0700, Randy Dunlap wrote: >> On 4/12/21 10:01 AM, Mike Rapoport wrote: >>> On Mon, Apr 12, 2021 at 08:49:49AM -0700, Randy Dunlap wrote: >>> >>> I thought about adding some prints to see what's causing the hang, the >>> reservations or their absence. Can you replace the debug patch with this >>> one: >>> >>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >>> index 776fc9b3fafe..a10ac252dbcc 100644 >>> --- a/arch/x86/kernel/setup.c >>> +++ b/arch/x86/kernel/setup.c >>> @@ -600,10 +600,13 @@ static bool __init snb_gfx_workaround_needed(void) >>> return false; >>> >>> vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); >>> + devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); >>> + >>> + pr_info("%s: vendor: %x, device: %x\n", __func__, vendor, device); >> >> s/device)/devid)/ > > Oh, sorry. > >>> + >>> if (vendor != 0x8086) >>> return false; >>> >>> - devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); >>> for (i = 0; i < ARRAY_SIZE(snb_ids); i++) >>> if (devid == snb_ids[i]) >>> return true; >> >> That prints: >> >> [0.00] snb_gfx_workaround_needed: vendor: 8086, device: 126 >> [0.00] early_reserve_memory: snb_gfx: 1 >> ... >> [0.014061] snb_gfx_workaround_needed: vendor: 8086, device: 126 >> [0.014064] reserving inaccessible SNB gfx pages >> >> >> The full boot log is attached. > > Can you please send the log with memblock=debug added to the kernel command > line? > > Probably should have started from this... > It's attached. -- ~Randy {bedtime} boot0409-memblk-debug.log.gz Description: application/gzip
[PATCH 3/8] MIPS: pci-rt3883: trivial: remove unused variable
Fixes the following compiler warning: warning: unused variable 'flags' [-Wunused-variable] Fixes: e5067c718b3a ("MIPS: pci-rt3883: Remove odd locking in PCI config space access code") Signed-off-by: Ilya Lipnitskiy Cc: Sergey Ryazanov Cc: triv...@kernel.org --- arch/mips/pci/pci-rt3883.c | 4 1 file changed, 4 deletions(-) diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 0ac6346026d0..e422f78db5bc 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -100,7 +100,6 @@ static u32 rt3883_pci_read_cfg32(struct rt3883_pci_controller *rpc, unsigned bus, unsigned slot, unsigned func, unsigned reg) { - unsigned long flags; u32 address; u32 ret; @@ -116,7 +115,6 @@ static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc, unsigned bus, unsigned slot, unsigned func, unsigned reg, u32 val) { - unsigned long flags; u32 address; address = rt3883_pci_get_cfgaddr(bus, slot, func, reg); @@ -229,7 +227,6 @@ static int rt3883_pci_config_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { struct rt3883_pci_controller *rpc; - unsigned long flags; u32 address; u32 data; @@ -263,7 +260,6 @@ static int rt3883_pci_config_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { struct rt3883_pci_controller *rpc; - unsigned long flags; u32 address; u32 data; -- 2.31.1
[PATCH 4/8] MIPS: pci-rt3883: more accurate DT error messages
Existing strings do not make sense: one is always NULL and the other refers to the wrong parent node. Signed-off-by: Ilya Lipnitskiy --- arch/mips/pci/pci-rt3883.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index e422f78db5bc..aebd4964ea34 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -431,8 +431,7 @@ static int rt3883_pci_probe(struct platform_device *pdev) if (!rpc->intc_of_node) { dev_err(dev, "%pOF has no %s child node", - rpc->intc_of_node, - "interrupt controller"); + np, "interrupt controller"); return -EINVAL; } @@ -446,8 +445,7 @@ static int rt3883_pci_probe(struct platform_device *pdev) if (!rpc->pci_controller.of_node) { dev_err(dev, "%pOF has no %s child node", - rpc->intc_of_node, - "PCI host bridge"); + np, "PCI host bridge"); err = -EINVAL; goto err_put_intc_node; } -- 2.31.1
[PATCH 7/8] MIPS: pci-legacy: remove busn_resource field
No drivers set the busn_resource field in the pci_controller struct. Commit 7ee214b540d9 ("MIPS: PCI: Remove unused busn_offset") almost removed it over 3 years ago. Remove it for good to free up memory and eliminate messages like: pci_bus :00: root bus resource [??? 0x flags 0x0] Signed-off-by: Ilya Lipnitskiy Cc: Bjorn Helgaas --- arch/mips/include/asm/pci.h | 1 - arch/mips/pci/pci-legacy.c | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 6f48649201c5..9ffc8192adae 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -38,7 +38,6 @@ struct pci_controller { struct resource *io_resource; unsigned long io_offset; unsigned long io_map_base; - struct resource *busn_resource; #ifndef CONFIG_PCI_DOMAINS_GENERIC unsigned int index; diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index ec3f52ade72d..78c22987bef0 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -89,7 +89,6 @@ static void pcibios_scanbus(struct pci_controller *hose) hose->mem_resource, hose->mem_offset); pci_add_resource_offset(&resources, hose->io_resource, hose->io_offset); - pci_add_resource(&resources, hose->busn_resource); list_splice_init(&resources, &bridge->windows); bridge->dev.parent = NULL; bridge->sysdata = hose; -- 2.31.1
[PATCH 6/8] MIPS: pci-legacy: remove redundant info messages
Remove the following pci-legacy message: PCI host bridge /pci@44/host-bridge ranges: MEM 0x2000..0x2fff IO 0x0046..0x0046 It is followed shortly by the same data from pci_register_host_bridge: PCI host bridge to bus :00 pci_bus :00: root bus resource [mem 0x2000-0x2fff] pci_bus :00: root bus resource [io 0x46-0x46] Signed-off-by: Ilya Lipnitskiy --- arch/mips/pci/pci-legacy.c | 7 --- 1 file changed, 7 deletions(-) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 3a909194284a..ec3f52ade72d 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -140,7 +140,6 @@ void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node) struct of_pci_range range; struct of_pci_range_parser parser; - pr_info("PCI host bridge %pOF ranges:\n", node); hose->of_node = node; if (of_pci_range_parser_init(&parser, node)) @@ -151,18 +150,12 @@ void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node) switch (range.flags & IORESOURCE_TYPE_BITS) { case IORESOURCE_IO: - pr_info(" IO 0x%016llx..0x%016llx\n", - range.cpu_addr, - range.cpu_addr + range.size - 1); hose->io_map_base = (unsigned long)ioremap(range.cpu_addr, range.size); res = hose->io_resource; break; case IORESOURCE_MEM: - pr_info(" MEM 0x%016llx..0x%016llx\n", - range.cpu_addr, - range.cpu_addr + range.size - 1); res = hose->mem_resource; break; } -- 2.31.1
[PATCH 0/8] MIPS: Fixes for PCI legacy drivers (rt2880, rt3883)
One major fix for rt2880-pci in the first patch - fixes breakage that existed since v4.14. Other more minor fixes, cleanups, and improvements that either free up memory, make dmesg messages clearer, or remove redundant dmesg output. Ilya Lipnitskiy (8): MIPS: pci-rt2880: fix slot 0 configuration MIPS: pci-rt2880: remove unneeded locks MIPS: pci-rt3883: trivial: remove unused variable MIPS: pci-rt3883: more accurate DT error messages MIPS: pci-legacy: stop using of_pci_range_to_resource MIPS: pci-legacy: remove redundant info messages MIPS: pci-legacy: remove busn_resource field MIPS: pci-legacy: use generic pci_enable_resources arch/mips/include/asm/pci.h | 1 - arch/mips/pci/pci-legacy.c | 57 ++--- arch/mips/pci/pci-rt2880.c | 63 +++-- arch/mips/pci/pci-rt3883.c | 10 ++ 4 files changed, 44 insertions(+), 87 deletions(-) -- 2.31.1
Re: [PATCH 1/1] arm: topology: parse the topology from the dt
Dietmar Eggemann 于2021年4月12日周一 下午8:40写道: > > On 12/04/2021 14:20, Ruifeng Zhang wrote: > > Valentin Schneider 于2021年4月12日周一 下午7:32写道: > >> > >> > >> Hi, > >> > >> On 12/04/21 15:08, Ruifeng Zhang wrote: > >>> From: Ruifeng Zhang > >>> > >>> The arm topology still parse from the MPIDR, but it is incomplete. When > >>> the armv8.3 cpu runs in aarch32 mode, it will parse out the wrong > >>> topology. > >>> > >>> armv7 (A7) mpidr is: > >>> [11:8] [7:2] [1:0] > >>> cluster reservedcpu > >>> > >>> armv8.3 (A55) mpidr is: > >>> [23:16] [15:8] [7:0] > >>> cluster cpu thread > >>> > >>> For compatibility to keep the function of get capacity from default > >>> cputype, renamed arm parse_dt_topology to get_cputype_capacity and delete > >>> related logic of parse from dt. > >>> Arm using the same parse_dt_topology function as arm64. > >>> > >>> The arm device boot step is to look for the default cputype and get cpu > >>> capacity firstly. Then parse the topology and capacity from dt to replace > >>> default values. > >>> > >> > >> I'm afraid I don't get it. > >> > >> CONFIG_COMPAT lets you run 32-bit stuff at EL0, but the kernel is still > >> arm64. So if you take your armv8.3 system, the topology parsed by the > >> kernel will be the same regardless of CONFIG_COMPAT. > >> > >> Could you elaborate on what problem you are trying to fix here? > > > > There is a armv8.3 cpu which should work normally both on aarch64 and > > aarch32. > > The MPIDR has been written to the chip register in armv8.3 format. > > For example, > > core0: 8000 > > core1: 8100 > > core2: 8200 > > ... > > > > Its cpu topology can be parsed normally on aarch64 mode (both > > userspace and kernel work on arm64). > > > > The problem is when it working on aarch32 mode (both userspace and > > kernel work on arm 32-bit), the cpu topology > > will parse error because of the format is different between armv7 and > > armv8.3. > > The arm 32-bit driver, arch/arm/kernel/topology will parse the MPIDR > > and store to the topology with armv7, > > and the result is all cpu core_id is 0, the bit[1:0] of armv7 MPIDR format. > > > > In addition, I think arm should also allow customers to configure cpu > > topologies via DT. > > This patch ruins the CPU capacity detection based on capacity-dmips-mhz > (Documentation/devicetree/bindings/arm/cpu-capacity.txt) on my TC2 [L B > B L L] (armv7). > > tip/sched/core with *mainline* multi_v7_defconfig: > > root@linaro-nano:~# cat /sys/devices/system/cpu/cpu*/cpu_capacity > 516 > 1024 > 1024 > 516 > 516 > > your patch with mainline multi_v7_defconfig: > > root@linaro-nano:~# cat /sys/devices/system/cpu/cpu*/cpu_capacity > 1024 > 1024 > 1024 > 1024 > 1024 > > > There are 2 capacity detection mechanism in arch/arm/kernel/topology.c: > > (1) cpu_efficiency (only for armv7 a15 and a7) based, relies on > clock-frequency dt property > > (2) capacity-dmips-mhz dt property based > > I currently don't see how this different MPIDR layout leads to you code > changes. Thanks for your test, I will update patch-V2 to solve this problem. > >
[PATCH] ASoC: ak5558: correct reset polarity
Reset (aka power off) happens when the reset gpio is made active. Change function name to ak5558_reset to match devicetree property "reset-gpios". Signed-off-by: Shengjiu Wang --- sound/soc/codecs/ak5558.c | 30 ++ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index 8e4dca753f0b..5c3f15827423 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -318,29 +318,19 @@ static struct snd_soc_dai_driver ak5552_dai = { .ops = &ak5558_dai_ops, }; -static void ak5558_power_off(struct ak5558_priv *ak5558) +static void ak5558_reset(struct ak5558_priv *ak5558, bool active) { - if (!ak5558->reset_gpiod) - return; - - gpiod_set_value_cansleep(ak5558->reset_gpiod, 0); - usleep_range(1000, 2000); -} - -static void ak5558_power_on(struct ak5558_priv *ak5558) -{ - if (!ak5558->reset_gpiod) - return; - - gpiod_set_value_cansleep(ak5558->reset_gpiod, 1); - usleep_range(1000, 2000); + if (ak5558->reset_gpiod) { + gpiod_set_value_cansleep(ak5558->reset_gpiod, active); + usleep_range(1000, 2000); + } } static int ak5558_probe(struct snd_soc_component *component) { struct ak5558_priv *ak5558 = snd_soc_component_get_drvdata(component); - ak5558_power_on(ak5558); + ak5558_reset(ak5558, false); return ak5558_set_mcki(component); } @@ -348,7 +338,7 @@ static void ak5558_remove(struct snd_soc_component *component) { struct ak5558_priv *ak5558 = snd_soc_component_get_drvdata(component); - ak5558_power_off(ak5558); + ak5558_reset(ak5558, true); } static int __maybe_unused ak5558_runtime_suspend(struct device *dev) @@ -356,7 +346,7 @@ static int __maybe_unused ak5558_runtime_suspend(struct device *dev) struct ak5558_priv *ak5558 = dev_get_drvdata(dev); regcache_cache_only(ak5558->regmap, true); - ak5558_power_off(ak5558); + ak5558_reset(ak5558, true); regulator_bulk_disable(ARRAY_SIZE(ak5558->supplies), ak5558->supplies); @@ -375,8 +365,8 @@ static int __maybe_unused ak5558_runtime_resume(struct device *dev) return ret; } - ak5558_power_off(ak5558); - ak5558_power_on(ak5558); + ak5558_reset(ak5558, true); + ak5558_reset(ak5558, false); regcache_cache_only(ak5558->regmap, false); regcache_mark_dirty(ak5558->regmap); -- 2.27.0
[PATCH] drm/i915/gvt: remove useless function
Fix the following clang warning: drivers/gpu/drm/i915/gvt/gtt.c:590:20: warning: unused function 'ppgtt_set_guest_root_entry' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- drivers/gpu/drm/i915/gvt/gtt.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 897c007..a01ff44 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -587,12 +587,6 @@ static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, entry, index, false, 0, mm->vgpu); } -static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, - struct intel_gvt_gtt_entry *entry, unsigned long index) -{ - _ppgtt_set_root_entry(mm, entry, index, true); -} - static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_entry *entry, unsigned long index) { -- 1.8.3.1
[PATCH] irq: Fix missing IRQF_ONESHOT as only threaded handler
From: Guangqing Zhu Coccinelle noticed: kernel/irq/manage.c:2199:8-28: ERROR: Threaded IRQ with no primary handler requested without IRQF_ONESHOT. Signed-off-by: Guangqing Zhu --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 4c14356543d9..222816750048 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2197,7 +2197,7 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, if (irq_settings_is_nested_thread(desc)) { ret = request_threaded_irq(irq, NULL, handler, - flags, name, dev_id); + flags | IRQF_ONESHOT, name, dev_id); return !ret ? IRQC_IS_NESTED : ret; } -- 2.17.1
Re: [PATCH 5.10 000/188] 5.10.30-rc1 review
On 2021/4/12 16:38, Greg Kroah-Hartman wrote: This is the start of the stable review cycle for the 5.10.30 release. There are 188 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Wed, 14 Apr 2021 08:39:44 +. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.30-rc1.gz or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y and the diffstat can be found below. thanks, greg k-h Tested on arm64 and x86 for 5.10.30-rc1, Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git Branch: linux-5.10.y Version: 5.10.30-rc1 Commit: 8ac4b1deedaa507b5d0f46316e7f32004dd99cd1 Compiler: gcc version 7.3.0 (GCC) arm64: Testcase Result Summary: total: 5264 passed: 5264 failed: 0 timeout: 0 x86: Testcase Result Summary: total: 5264 passed: 5264 failed: 0 timeout: 0 Tested-by: Hulk Robot
[RFC PATCH] delayacct: delayacct_stats[] can be static
Reported-by: kernel test robot Signed-off-by: kernel test robot --- delayacct.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index b8d719fbfc404..2505aa9f87f61 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -32,7 +32,7 @@ struct delayacct_stat { unsigned int idx; }; -struct delayacct_stat delayacct_stats[] = { +static struct delayacct_stat delayacct_stats[] = { {"blkio", DELAYACCT_BLKIO}, {"swapin", DELAYACCT_SWAPIN}, {"pagecache_thrashing", DELAYACCT_THRASHING},
Re: [RESEND PATCH 2/2] delayacct: Add a proc file to dump the delay info
Hi brookxu, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on linus/master] [also build test WARNING on v5.12-rc7 next-20210412] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/brookxu/delayacct-refactor-the-code-to-simplify-the-implementation/20210413-093934 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 89698becf06d341a700913c3d89ce2a914af69a2 config: x86_64-randconfig-s021-20210413 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-280-g2cd6d34e-dirty # https://github.com/0day-ci/linux/commit/7023a409dec95195a0e3360a36e8cb66363a9457 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review brookxu/delayacct-refactor-the-code-to-simplify-the-implementation/20210413-093934 git checkout 7023a409dec95195a0e3360a36e8cb66363a9457 # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot sparse warnings: (new ones prefixed by >>) >> kernel/delayacct.c:35:23: sparse: sparse: symbol 'delayacct_stats' was not >> declared. Should it be static? Please review and possibly fold the followup patch. --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
[tip:x86/sgx] BUILD SUCCESS 523caed9efbb049339706b124185c9358c1b6477
defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig arc defconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig sparcallyesconfig sparc defconfig i386defconfig mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig i386 randconfig-a003-20210412 i386 randconfig-a001-20210412 i386 randconfig-a006-20210412 i386 randconfig-a005-20210412 i386 randconfig-a004-20210412 i386 randconfig-a002-20210412 i386 randconfig-a003-20210413 i386 randconfig-a001-20210413 i386 randconfig-a006-20210413 i386 randconfig-a005-20210413 i386 randconfig-a004-20210413 i386 randconfig-a002-20210413 x86_64 randconfig-a014-20210412 x86_64 randconfig-a015-20210412 x86_64 randconfig-a011-20210412 x86_64 randconfig-a013-20210412 x86_64 randconfig-a012-20210412 x86_64 randconfig-a016-20210412 i386 randconfig-a015-20210412 i386 randconfig-a014-20210412 i386 randconfig-a013-20210412 i386 randconfig-a012-20210412 i386 randconfig-a016-20210412 i386 randconfig-a011-20210412 riscvnommu_k210_defconfig riscvnommu_virt_defconfig riscv defconfig riscv rv32_defconfig umallnoconfig um allyesconfig um defconfig x86_64rhel-8.3-kselftests x86_64 defconfig x86_64 rhel-8.3 x86_64 rhel-8.3-kbuiltin x86_64 kexec clang tested configs: x86_64 randconfig-a003-20210412 x86_64 randconfig-a002-20210412 x86_64 randconfig-a001-20210412 x86_64 randconfig-a005-20210412 x86_64 randconfig-a006-20210412 x86_64 randconfig-a004-20210412 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
[tip:x86/platform] BUILD SUCCESS 8f2aca40dd077f74e62982cd2669845f41ed0ac6
multi_v5_defconfig powerpc pq2fads_defconfig sh se7751_defconfig m68k amiga_defconfig arm vf610m4_defconfig arm lpc32xx_defconfig h8300alldefconfig powerpc ebony_defconfig pariscgeneric-32bit_defconfig ia64 allmodconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig arc allyesconfig nds32 allnoconfig nds32 defconfig cskydefconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig arc defconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig sparcallyesconfig sparc defconfig i386defconfig mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig x86_64 randconfig-a003-20210413 x86_64 randconfig-a002-20210413 x86_64 randconfig-a001-20210413 x86_64 randconfig-a005-20210413 x86_64 randconfig-a006-20210413 x86_64 randconfig-a004-20210413 i386 randconfig-a003-20210412 i386 randconfig-a001-20210412 i386 randconfig-a006-20210412 i386 randconfig-a005-20210412 i386 randconfig-a004-20210412 i386 randconfig-a002-20210412 i386 randconfig-a003-20210413 i386 randconfig-a001-20210413 i386 randconfig-a006-20210413 i386 randconfig-a005-20210413 i386 randconfig-a004-20210413 i386 randconfig-a002-20210413 x86_64 randconfig-a014-20210412 x86_64 randconfig-a015-20210412 x86_64 randconfig-a011-20210412 x86_64 randconfig-a013-20210412 x86_64 randconfig-a012-20210412 x86_64 randconfig-a016-20210412 i386 randconfig-a015-20210412 i386 randconfig-a014-20210412 i386 randconfig-a013-20210412 i386 randconfig-a012-20210412 i386 randconfig-a016-20210412 i386 randconfig-a011-20210412 riscvnommu_k210_defconfig riscvnommu_virt_defconfig riscv defconfig riscv rv32_defconfig um allyesconfig x86_64rhel-8.3-kselftests x86_64 defconfig x86_64 rhel-8.3 x86_64 rhel-8.3-kbuiltin x86_64 kexec clang tested configs: x86_64 randconfig-a003-20210412 x86_64 randconfig-a002-20210412 x86_64 randconfig-a001-20210412 x86_64 randconfig-a005-20210412 x86_64 randconfig-a006-20210412 x86_64 randconfig-a004-20210412 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org
Re: [PATCH 1/1] arm: topology: parse the topology from the dt
Valentin Schneider 于2021年4月12日周一 下午11:33写道: > > On 12/04/21 20:20, Ruifeng Zhang wrote: > > There is a armv8.3 cpu which should work normally both on aarch64 and > > aarch32. > > The MPIDR has been written to the chip register in armv8.3 format. > > For example, > > core0: 8000 > > core1: 8100 > > core2: 8200 > > ... > > > > Its cpu topology can be parsed normally on aarch64 mode (both > > userspace and kernel work on arm64). > > > > The problem is when it working on aarch32 mode (both userspace and > > kernel work on arm 32-bit), > > I didn't know using aarch32 elsewhere than EL0 was something actually being > used. Do you deploy this somewhere, or do you use it for testing purposes? In Unisoc, the sc9863a SoC which using cortex-a55, it has two software version, one of them is the kernel running on EL1 using aarch32. user(EL0)kernel(EL1) sc9863a_go aarch32 aarch32 sc9863aaarch64 aarch64 > > > the cpu topology > > will parse error because of the format is different between armv7 and > > armv8.3. > > The arm 32-bit driver, arch/arm/kernel/topology will parse the MPIDR > > and store to the topology with armv7, > > and the result is all cpu core_id is 0, the bit[1:0] of armv7 MPIDR format. > > > > I'm not fluent at all in armv7 (or most aarch32 compat mode stuff), but > I couldn't find anything about MPIDR format differences: > > DDI 0487G.a G8.2.113 > """ > AArch32 System register MPIDR bits [31:0] are architecturally mapped to > AArch64 System register MPIDR_EL1[31:0]. > """ > > Peeking at some armv7 doc and arm/kernel/topology.c the layout really looks > just the same, i.e. for both of them, with your example of: The cortex-a7 spec DDI0464F 4.3.5 https://developer.arm.com/documentation/ddi0464/f/?lang=en The current arch/arm/kernel/topology code parse the MPIDR with a armv7 format. the parse code is: void store_cpu_topology(unsigned int cpuid) { ... cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); ... } > > core0: 8000 > core1: 8100 > core2: 8200 > ... > > we'll get: > > | | aff2 | aff1 | aff0 | > |---+--+--+--| > | Core0 |0 |0 |0 | > | Core1 |0 |1 |0 | > | Core2 |0 |2 |0 | > ... > > Now, arm64 doesn't fallback to MPIDR for topology information anymore since > > 3102bc0e6ac7 ("arm64: topology: Stop using MPIDR for topology information") > > so without DT we would get: > | | package_id | core_id | > |---++-| > | Core0 | 0 | 0 | > | Core1 | 0 | 1 | > | Core2 | 0 | 2 | > > Whereas with an arm kernel we'll end up parsing MPIDR as: > | | package_id | core_id | > |---++-| > | Core0 | 0 | 0 | > | Core1 | 1 | 0 | > | Core2 | 2 | 0 | > > Did I get this right? Is this what you're observing? Yes, this is a problem if an armv8.2 or above cpu is running a 32-bit kernel on EL1. > > > In addition, I think arm should also allow customers to configure cpu > > topologies via DT.
Re: [syzbot] KASAN: slab-out-of-bounds Read in reiserfs_xattr_get
On Tue, Apr 13, 2021 at 7:55 AM syzbot wrote: > > Hello, > > syzbot found the following issue on: > > HEAD commit:3a229812 Merge tag 'arm-fixes-5.11-2' of git://git.kernel... > git tree: upstream > console output: https://syzkaller.appspot.com/x/log.txt?x=16b4d196d0 > kernel config: https://syzkaller.appspot.com/x/.config?x=f91155ccddaf919c > dashboard link: https://syzkaller.appspot.com/bug?extid=72ba979b6681c3369db4 > compiler: Debian clang version 11.0.1-2 > > Unfortunately, I don't have any reproducer for this issue yet. > > IMPORTANT: if you fix the issue, please add the following tag to the commit: > Reported-by: syzbot+72ba979b6681c3369...@syzkaller.appspotmail.com Maybe related to: https://lore.kernel.org/lkml/5f397905ba42a...@google.com/ ? there are some uninits involved in reiserfs attrs. > loop3: detected capacity change from 0 to 65534 > == > BUG: KASAN: slab-out-of-bounds in reiserfs_xattr_get+0xe0/0x590 > fs/reiserfs/xattr.c:681 > Read of size 8 at addr 888028983198 by task syz-executor.3/4211 > > CPU: 1 PID: 4211 Comm: syz-executor.3 Not tainted 5.12.0-rc6-syzkaller #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS > Google 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:79 [inline] > dump_stack+0x176/0x24e lib/dump_stack.c:120 > print_address_description+0x5f/0x3a0 mm/kasan/report.c:232 > __kasan_report mm/kasan/report.c:399 [inline] > kasan_report+0x15c/0x200 mm/kasan/report.c:416 > reiserfs_xattr_get+0xe0/0x590 fs/reiserfs/xattr.c:681 > reiserfs_get_acl+0x63/0x670 fs/reiserfs/xattr_acl.c:211 > get_acl+0x152/0x2e0 fs/posix_acl.c:141 > check_acl fs/namei.c:294 [inline] > acl_permission_check fs/namei.c:339 [inline] > generic_permission+0x2ed/0x5b0 fs/namei.c:392 > do_inode_permission fs/namei.c:446 [inline] > inode_permission+0x28e/0x500 fs/namei.c:513 > may_open+0x228/0x3e0 fs/namei.c:2985 > do_open fs/namei.c:3365 [inline] > path_openat+0x2697/0x3860 fs/namei.c:3500 > do_filp_open+0x1a3/0x3b0 fs/namei.c:3527 > do_sys_openat2+0xba/0x380 fs/open.c:1187 > do_sys_open fs/open.c:1203 [inline] > __do_sys_openat fs/open.c:1219 [inline] > __se_sys_openat fs/open.c:1214 [inline] > __x64_sys_openat+0x1c8/0x1f0 fs/open.c:1214 > do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 > entry_SYSCALL_64_after_hwframe+0x44/0xae > RIP: 0033:0x419544 > Code: 84 00 00 00 00 00 44 89 54 24 0c e8 96 f9 ff ff 44 8b 54 24 0c 44 89 e2 > 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 > 34 44 89 c7 89 44 24 0c e8 c8 f9 ff ff 8b 44 > RSP: 002b:7fa357a03f30 EFLAGS: 0293 ORIG_RAX: 0101 > RAX: ffda RBX: 2200 RCX: 00419544 > RDX: 0001 RSI: 2100 RDI: ff9c > RBP: 2100 R08: R09: 2000 > R10: R11: 0293 R12: 0001 > R13: 2100 R14: 7fa357a04000 R15: 20065600 > > Allocated by task 4210: > kasan_save_stack mm/kasan/common.c:38 [inline] > kasan_set_track mm/kasan/common.c:46 [inline] > set_alloc_info mm/kasan/common.c:427 [inline] > kasan_kmalloc+0xc2/0xf0 mm/kasan/common.c:506 > kasan_kmalloc include/linux/kasan.h:233 [inline] > kmem_cache_alloc_trace+0x21b/0x350 mm/slub.c:2934 > kmalloc include/linux/slab.h:554 [inline] > kzalloc include/linux/slab.h:684 [inline] > smk_fetch security/smack/smack_lsm.c:288 [inline] > smack_d_instantiate+0x65c/0xcc0 security/smack/smack_lsm.c:3411 > security_d_instantiate+0xa5/0x100 security/security.c:1987 > d_instantiate_new+0x61/0x110 fs/dcache.c:2025 > ext4_add_nondir+0x22b/0x290 fs/ext4/namei.c:2590 > ext4_symlink+0x8ce/0xe90 fs/ext4/namei.c:3417 > vfs_symlink+0x3a0/0x540 fs/namei.c:4178 > do_symlinkat+0x1c9/0x440 fs/namei.c:4208 > do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 > entry_SYSCALL_64_after_hwframe+0x44/0xae > > Freed by task 4210: > kasan_save_stack mm/kasan/common.c:38 [inline] > kasan_set_track+0x3d/0x70 mm/kasan/common.c:46 > kasan_set_free_info+0x1f/0x40 mm/kasan/generic.c:357 > kasan_slab_free+0x100/0x140 mm/kasan/common.c:360 > kasan_slab_free include/linux/kasan.h:199 [inline] > slab_free_hook mm/slub.c:1562 [inline] > slab_free_freelist_hook+0x171/0x270 mm/slub.c:1600 > slab_free mm/slub.c:3161 [inline] > kfree+0xcf/0x2d0 mm/slub.c:4213 > smk_fetch security/smack/smack_lsm.c:300 [inline] > smack_d_instantiate+0x6db/0xcc0 security/smack/smack_lsm.c:3411 > security_d_instantiate+0xa5/0x100 security/security.c:1987 > d_instantiate_new+0x61/0x110 fs/dcache.c:2025 > ext4_add_nondir+0x22b/0x290 fs/ext4/namei.c:2590 > ext4_symlink+0x8ce/0xe90 fs/ext4/namei.c:3417 > vfs_symlink+0x3a0/0x540 fs/namei.c:4178 > do_symlinkat+0x1c9/0x440 fs/namei.c:4208 > do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 > entry_SYSCALL_64_after_hwframe+0x44/0xae > > Las
Re: [PATCH v14 4/6] locking/qspinlock: Introduce starvation avoidance into CNA
Andi Kleen writes: > Alex Kogan writes: >> >> +numa_spinlock_threshold=[NUMA, PV_OPS] >> +Set the time threshold in milliseconds for the >> +number of intra-node lock hand-offs before the >> +NUMA-aware spinlock is forced to be passed to >> +a thread on another NUMA node. Valid values >> +are in the [1..100] range. Smaller values result >> +in a more fair, but less performant spinlock, >> +and vice versa. The default value is 10. > > ms granularity seems very coarse grained for this. Surely > at some point of spinning you can afford a ktime_get? But ok. Actually thinking about it more using jiffies is likely broken anyways because if the interrupts are disabled and the CPU is running the main timer interrupts they won't increase. cpu_clock (better than ktime_get) or sched_clock would work. -Andi
/usr/bin/ld: ll_temac_main.c:undefined reference to `devm_of_iomap'
Hi Andre, FYI, the error/warning still remains. tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 89698becf06d341a700913c3d89ce2a914af69a2 commit: e8b6c54f6d57822e228027d41a1edb317034a08c net: xilinx: temac: Relax Kconfig dependencies date: 1 year, 1 month ago config: um-randconfig-r026-20210413 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e8b6c54f6d57822e228027d41a1edb317034a08c git remote add linus https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git git fetch --no-tags linus master git checkout e8b6c54f6d57822e228027d41a1edb317034a08c # save the attached .config to linux build tree make W=1 ARCH=um If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): /usr/bin/ld: drivers/net/ethernet/xilinx/ll_temac_main.o: in function `temac_probe': ll_temac_main.c:(.text+0xe9d): undefined reference to `devm_ioremap' >> /usr/bin/ld: ll_temac_main.c:(.text+0xf90): undefined reference to >> `devm_of_iomap' /usr/bin/ld: ll_temac_main.c:(.text+0x1159): undefined reference to `devm_ioremap' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x8): undefined reference to `X86_FEATURE_XMM2' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x15): undefined reference to `X86_FEATURE_XMM' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x22): undefined reference to `X86_FEATURE_XMM' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x2f): undefined reference to `X86_FEATURE_XMM2' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x3c): undefined reference to `X86_FEATURE_XMM' /usr/bin/ld: drivers/misc/altera-stapl/altera-lpt.o:(.altinstructions+0x49): undefined reference to `X86_FEATURE_XMM' collect2: error: ld returned 1 exit status --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org .config.gz Description: application/gzip
Re: [PATCH][next] scsi: aacraid: Replace one-element array with flexible-array member
Hi Martin, On 4/12/21 23:52, Martin K. Petersen wrote: > Silencing analyzer warnings shouldn't be done at the expense of human > readers. If it is imperative to switch to flex_array_size() to quiesce > checker warnings, please add a comment in the code explaining that the > size evaluates to nseg_new-1 sge_ieee1212 structs. Done: https://lore.kernel.org/lkml/20210413054032.GA276102@embeddedor/ Thanks! -- Gustavo
Re: [PATCH] KVM: arm/arm64: Fix KVM_VGIC_V3_ADDR_TYPE_REDIST read
On 2021/4/12 23:00, Eric Auger wrote: > When reading the base address of the a REDIST region > through KVM_VGIC_V3_ADDR_TYPE_REDIST we expect the > redistributor region list to be populated with a single > element. > > However list_first_entry() expects the list to be non empty. Indeed, list_first_entry() always return a non-null ptr. If the list is empty, it will mistake the list head as the first element. > Instead we should use list_first_entry_or_null which effectively > returns NULL if the list is empty. > > Fixes: dbd9733ab674 ("KVM: arm/arm64: Replace the single rdist region by a > list") > Cc: # v4.18+ > Signed-off-by: Eric Auger > Reported-by: Gavin Shan > --- > arch/arm64/kvm/vgic/vgic-kvm-device.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c > b/arch/arm64/kvm/vgic/vgic-kvm-device.c > index 44419679f91a..5eaede3e3b5a 100644 > --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c > +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c > @@ -87,8 +87,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 > *addr, bool write) > r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); > goto out; > } > - rdreg = list_first_entry(&vgic->rd_regions, > - struct vgic_redist_region, list); > + rdreg = list_first_entry_or_null(&vgic->rd_regions, > + struct vgic_redist_region, > list); > if (!rdreg) > addr_ptr = &undef_value; > else >
Re: [PATCH v5 04/16] memory: mtk-smi: Add device-link between smi-larb and smi-common
On Sat, 2021-04-10 at 14:40 +0200, Krzysztof Kozlowski wrote: > On 10/04/2021 11:11, Yong Wu wrote: > > Normally, If the smi-larb HW need work, we should enable the smi-common > > HW power and clock firstly. > > This patch adds device-link between the smi-larb dev and the smi-common > > dev. then If pm_runtime_get_sync(smi-larb-dev), the pm_runtime_get_sync > > (smi-common-dev) will be called automatically. > > > > Also, Add DL_FLAG_STATELESS to avoid the smi-common clocks be gated when > > probe. > > > > CC: Matthias Brugger > > Suggested-by: Tomasz Figa > > Signed-off-by: Yong Wu > > --- > > drivers/memory/mtk-smi.c | 19 ++- > > 1 file changed, 10 insertions(+), 9 deletions(-) > > I understood this is a dependency for other patches, so: > Acked-by: Krzysztof Kozlowski > > If I am wrong and I can take it via memory tree, let me know. Hi Krzysztof, Thanks very much for your quickly review. I think it is ok if it go through memory tree. In the original patch, we pm_runtime_get(smi-common-dev) in the smi-larb's pm resume callback. This patch only use device-link do this. thus, this patch have no function change. it only adjusts the SMI internal code flow. In addition, [14/16] expects your Acked-by. and that one should be merged with the others. About the others patches, I'm not sure which tree they should go through. they cross several trees, dt-binding/iommu/media/drm/dts. Not sure if Matthias could have time to review and give some suggestion. > > Best regards, > Krzysztof > > ___ > Linux-mediatek mailing list > linux-media...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-mediatek
Re: linux-next: Tree for Apr 9 (x86 boot problem)
Hi Randy, On Mon, Apr 12, 2021 at 01:53:34PM -0700, Randy Dunlap wrote: > On 4/12/21 10:01 AM, Mike Rapoport wrote: > > On Mon, Apr 12, 2021 at 08:49:49AM -0700, Randy Dunlap wrote: > > > > I thought about adding some prints to see what's causing the hang, the > > reservations or their absence. Can you replace the debug patch with this > > one: > > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > > index 776fc9b3fafe..a10ac252dbcc 100644 > > --- a/arch/x86/kernel/setup.c > > +++ b/arch/x86/kernel/setup.c > > @@ -600,10 +600,13 @@ static bool __init snb_gfx_workaround_needed(void) > > return false; > > > > vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); > > + devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); > > + > > + pr_info("%s: vendor: %x, device: %x\n", __func__, vendor, device); > > s/device)/devid)/ Oh, sorry. > > + > > if (vendor != 0x8086) > > return false; > > > > - devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); > > for (i = 0; i < ARRAY_SIZE(snb_ids); i++) > > if (devid == snb_ids[i]) > > return true; > > That prints: > > [0.00] snb_gfx_workaround_needed: vendor: 8086, device: 126 > [0.00] early_reserve_memory: snb_gfx: 1 > ... > [0.014061] snb_gfx_workaround_needed: vendor: 8086, device: 126 > [0.014064] reserving inaccessible SNB gfx pages > > > The full boot log is attached. Can you please send the log with memblock=debug added to the kernel command line? Probably should have started from this... -- Sincerely yours, Mike.
[PATCH] hwmon: (nct6683) remove useless function
Fix the following clang warning: drivers/hwmon/nct6683.c:491:19: warning: unused function 'in_to_reg' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- drivers/hwmon/nct6683.c | 11 --- 1 file changed, 11 deletions(-) diff --git a/drivers/hwmon/nct6683.c b/drivers/hwmon/nct6683.c index a23047a..b886cf0 100644 --- a/drivers/hwmon/nct6683.c +++ b/drivers/hwmon/nct6683.c @@ -488,17 +488,6 @@ static inline long in_from_reg(u16 reg, u8 src) return reg * scale; } -static inline u16 in_to_reg(u32 val, u8 src) -{ - int scale = 16; - - if (src == MON_SRC_VCC || src == MON_SRC_VSB || src == MON_SRC_AVSB || - src == MON_SRC_VBAT) - scale <<= 1; - - return clamp_val(DIV_ROUND_CLOSEST(val, scale), 0, 127); -} - static u16 nct6683_read(struct nct6683_data *data, u16 reg) { int res; -- 1.8.3.1
Re: [PATCH v14 4/6] locking/qspinlock: Introduce starvation avoidance into CNA
Alex Kogan writes: > > + numa_spinlock_threshold=[NUMA, PV_OPS] > + Set the time threshold in milliseconds for the > + number of intra-node lock hand-offs before the > + NUMA-aware spinlock is forced to be passed to > + a thread on another NUMA node. Valid values > + are in the [1..100] range. Smaller values result > + in a more fair, but less performant spinlock, > + and vice versa. The default value is 10. ms granularity seems very coarse grained for this. Surely at some point of spinning you can afford a ktime_get? But ok. Could you turn that into a moduleparm which can be changed at runtime? Would be strange to have to reboot just to play with this parameter This would also make the code a lot shorter I guess. -Andi
[syzbot] KASAN: null-ptr-deref Write in rhashtable_free_and_destroy (2)
Hello, syzbot found the following issue on: HEAD commit:d93a0d43 Merge tag 'block-5.12-2021-04-02' of git://git.ke.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=12d81cfcd0 kernel config: https://syzkaller.appspot.com/x/.config?x=71a75beb62b62a34 dashboard link: https://syzkaller.appspot.com/bug?extid=860268315ba86ea6b96b compiler: Debian clang version 11.0.1-2 Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+860268315ba86ea6b...@syzkaller.appspotmail.com == BUG: KASAN: null-ptr-deref in instrument_atomic_read_write include/linux/instrumented.h:101 [inline] BUG: KASAN: null-ptr-deref in test_and_set_bit include/asm-generic/bitops/instrumented-atomic.h:70 [inline] BUG: KASAN: null-ptr-deref in try_to_grab_pending+0xee/0xa50 kernel/workqueue.c:1257 Write of size 8 at addr 0088 by task kworker/0:3/4787 CPU: 0 PID: 4787 Comm: kworker/0:3 Not tainted 5.12.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events cfg80211_destroy_iface_wk Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x176/0x24e lib/dump_stack.c:120 __kasan_report mm/kasan/report.c:403 [inline] kasan_report+0x152/0x200 mm/kasan/report.c:416 check_region_inline mm/kasan/generic.c:135 [inline] kasan_check_range+0x2b5/0x2f0 mm/kasan/generic.c:186 instrument_atomic_read_write include/linux/instrumented.h:101 [inline] test_and_set_bit include/asm-generic/bitops/instrumented-atomic.h:70 [inline] try_to_grab_pending+0xee/0xa50 kernel/workqueue.c:1257 __cancel_work_timer+0x81/0x5b0 kernel/workqueue.c:3098 rhashtable_free_and_destroy+0x25/0x8b0 lib/rhashtable.c:1137 mesh_table_free net/mac80211/mesh_pathtbl.c:70 [inline] mesh_pathtbl_unregister+0x4b/0xa0 net/mac80211/mesh_pathtbl.c:812 unregister_netdevice_many+0x12ea/0x18e0 net/core/dev.c:10951 unregister_netdevice_queue+0x2a9/0x300 net/core/dev.c:10868 unregister_netdevice include/linux/netdevice.h:2884 [inline] _cfg80211_unregister_wdev+0x17b/0x5b0 net/wireless/core.c:1127 ieee80211_if_remove+0x1cc/0x250 net/mac80211/iface.c:2020 ieee80211_del_iface+0x12/0x20 net/mac80211/cfg.c:144 rdev_del_virtual_intf net/wireless/rdev-ops.h:57 [inline] cfg80211_destroy_ifaces+0x182/0x250 net/wireless/core.c:341 cfg80211_destroy_iface_wk+0x30/0x40 net/wireless/core.c:354 process_one_work+0x789/0xfd0 kernel/workqueue.c:2275 worker_thread+0xac1/0x1300 kernel/workqueue.c:2421 kthread+0x39a/0x3c0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 == --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
[PATCH v4 3/4] pinctrl: add drive for I2C related pins on MT8195
This patch provides the advanced drive raw data setting version for I2C used pins on MT8195. Signed-off-by: Zhiyong Tao --- drivers/pinctrl/mediatek/pinctrl-mt8195.c | 22 +++ .../pinctrl/mediatek/pinctrl-mtk-common-v2.c | 14 .../pinctrl/mediatek/pinctrl-mtk-common-v2.h | 5 + 3 files changed, 41 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c index 063f164d7c9b..a7500e18bb1d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -760,6 +760,25 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { PIN_FIELD_BASE(143, 143, 1, 0x020, 0x10, 24, 3), }; +static const struct mtk_pin_field_calc mt8195_pin_drv_adv_range[] = { + PIN_FIELD_BASE(8, 8, 4, 0x020, 0x10, 15, 3), + PIN_FIELD_BASE(9, 9, 4, 0x020, 0x10, 0, 3), + PIN_FIELD_BASE(10, 10, 4, 0x020, 0x10, 18, 3), + PIN_FIELD_BASE(11, 11, 4, 0x020, 0x10, 3, 3), + PIN_FIELD_BASE(12, 12, 4, 0x020, 0x10, 21, 3), + PIN_FIELD_BASE(13, 13, 4, 0x020, 0x10, 6, 3), + PIN_FIELD_BASE(14, 14, 4, 0x020, 0x10, 24, 3), + PIN_FIELD_BASE(15, 15, 4, 0x020, 0x10, 9, 3), + PIN_FIELD_BASE(16, 16, 4, 0x020, 0x10, 27, 3), + PIN_FIELD_BASE(17, 17, 4, 0x020, 0x10, 12, 3), + PIN_FIELD_BASE(29, 29, 2, 0x020, 0x10, 0, 3), + PIN_FIELD_BASE(30, 30, 2, 0x020, 0x10, 3, 3), + PIN_FIELD_BASE(34, 34, 1, 0x040, 0x10, 0, 3), + PIN_FIELD_BASE(35, 35, 1, 0x040, 0x10, 3, 3), + PIN_FIELD_BASE(44, 44, 1, 0x040, 0x10, 6, 3), + PIN_FIELD_BASE(45, 45, 1, 0x040, 0x10, 9, 3), +}; + static const struct mtk_pin_reg_calc mt8195_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_MODE] = MTK_RANGE(mt8195_pin_mode_range), [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8195_pin_dir_range), @@ -773,6 +792,7 @@ static const struct mtk_pin_reg_calc mt8195_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_PUPD] = MTK_RANGE(mt8195_pin_pupd_range), [PINCTRL_PIN_REG_R0] = MTK_RANGE(mt8195_pin_r0_range), [PINCTRL_PIN_REG_R1] = MTK_RANGE(mt8195_pin_r1_range), + [PINCTRL_PIN_REG_DRV_ADV] = MTK_RANGE(mt8195_pin_drv_adv_range), }; static const char * const mt8195_pinctrl_register_base_names[] = { @@ -801,6 +821,8 @@ static const struct mtk_pin_soc mt8195_data = { .bias_get_combo = mtk_pinconf_bias_get_combo, .drive_set = mtk_pinconf_drive_set_rev1, .drive_get = mtk_pinconf_drive_get_rev1, + .adv_drive_get = mtk_pinconf_adv_drive_get_raw, + .adv_drive_set = mtk_pinconf_adv_drive_set_raw, }; static const struct of_device_id mt8195_pinctrl_of_match[] = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 72f17f26acd8..2b51f4a9b860 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -1027,6 +1027,20 @@ int mtk_pinconf_adv_drive_get(struct mtk_pinctrl *hw, } EXPORT_SYMBOL_GPL(mtk_pinconf_adv_drive_get); +int mtk_pinconf_adv_drive_set_raw(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 arg) +{ + return mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DRV_ADV, arg); +} +EXPORT_SYMBOL_GPL(mtk_pinconf_adv_drive_set_raw); + +int mtk_pinconf_adv_drive_get_raw(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 *val) +{ + return mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DRV_ADV, val); +} +EXPORT_SYMBOL_GPL(mtk_pinconf_adv_drive_get_raw); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Sean Wang "); MODULE_DESCRIPTION("Pin configuration library module for mediatek SoCs"); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h index e2aae285b5fc..fd5ce9c5dcbd 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h @@ -66,6 +66,7 @@ enum { PINCTRL_PIN_REG_DRV_EN, PINCTRL_PIN_REG_DRV_E0, PINCTRL_PIN_REG_DRV_E1, + PINCTRL_PIN_REG_DRV_ADV, PINCTRL_PIN_REG_MAX, }; @@ -314,6 +315,10 @@ int mtk_pinconf_adv_drive_set(struct mtk_pinctrl *hw, const struct mtk_pin_desc *desc, u32 arg); int mtk_pinconf_adv_drive_get(struct mtk_pinctrl *hw, const struct mtk_pin_desc *desc, u32 *val); +int mtk_pinconf_adv_drive_set_raw(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 arg); +int mtk_pinconf_adv_drive_get_raw(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 *val); bool mtk_is_virt_gpio(struct mtk_pinctrl *hw, unsigned int gpio_n); #endif /* __PINCTRL_MTK_COMMON_V2_H */ -- 2.18.0
[syzbot] KASAN: slab-out-of-bounds Read in __xfrm_decode_session (2)
Hello, syzbot found the following issue on: HEAD commit:1678e493 Merge tag 'lto-v5.12-rc6' of git://git.kernel.org.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=1565bf7cd0 kernel config: https://syzkaller.appspot.com/x/.config?x=71a75beb62b62a34 dashboard link: https://syzkaller.appspot.com/bug?extid=518a7b845c0083047e9c compiler: Debian clang version 11.0.1-2 Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+518a7b845c0083047...@syzkaller.appspotmail.com == BUG: KASAN: slab-out-of-bounds in decode_session6 net/xfrm/xfrm_policy.c:3403 [inline] BUG: KASAN: slab-out-of-bounds in __xfrm_decode_session+0x1ba4/0x2720 net/xfrm/xfrm_policy.c:3495 Read of size 1 at addr 888013104540 by task syz-executor.3/16514 CPU: 0 PID: 16514 Comm: syz-executor.3 Not tainted 5.12.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x176/0x24e lib/dump_stack.c:120 print_address_description+0x5f/0x3a0 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report+0x15c/0x200 mm/kasan/report.c:416 decode_session6 net/xfrm/xfrm_policy.c:3403 [inline] __xfrm_decode_session+0x1ba4/0x2720 net/xfrm/xfrm_policy.c:3495 vti_tunnel_xmit+0x1ea/0x1510 net/ipv4/ip_vti.c:286 __netdev_start_xmit include/linux/netdevice.h:4825 [inline] netdev_start_xmit include/linux/netdevice.h:4839 [inline] xmit_one net/core/dev.c:3605 [inline] dev_hard_start_xmit+0x20b/0x450 net/core/dev.c:3621 sch_direct_xmit+0x1f0/0xd30 net/sched/sch_generic.c:313 qdisc_restart net/sched/sch_generic.c:376 [inline] __qdisc_run+0xa4d/0x1a90 net/sched/sch_generic.c:384 __dev_xmit_skb net/core/dev.c:3855 [inline] __dev_queue_xmit+0x1141/0x2a50 net/core/dev.c:4162 neigh_output include/net/neighbour.h:510 [inline] ip6_finish_output2+0x10be/0x1460 net/ipv6/ip6_output.c:117 dst_output include/net/dst.h:448 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ndisc_send_skb+0x93b/0xd50 net/ipv6/ndisc.c:508 addrconf_rs_timer+0x242/0x6f0 net/ipv6/addrconf.c:3877 call_timer_fn+0x91/0x160 kernel/time/timer.c:1431 expire_timers kernel/time/timer.c:1476 [inline] __run_timers+0x6c0/0x8a0 kernel/time/timer.c:1745 run_timer_softirq+0x63/0xf0 kernel/time/timer.c:1758 __do_softirq+0x318/0x714 kernel/softirq.c:345 invoke_softirq kernel/softirq.c:221 [inline] __irq_exit_rcu+0x1d8/0x200 kernel/softirq.c:422 irq_exit_rcu+0x5/0x20 kernel/softirq.c:434 sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 RIP: 0010:__sanitizer_cov_trace_pc+0x56/0x60 kernel/kcov.c:205 Code: 2c 8b 91 10 15 00 00 83 fa 02 75 21 48 8b 91 18 15 00 00 48 8b 32 48 8d 7e 01 8b 89 14 15 00 00 48 39 cf 73 08 48 89 44 f2 08 <48> 89 3a c3 66 0f 1f 44 00 00 4c 8b 04 24 65 48 8b 14 25 80 ef 01 RSP: 0018:c90001acf9f0 EFLAGS: 0283 RAX: 821506a4 RBX: RCX: 0004 RDX: c9000f2df000 RSI: 2928 RDI: 2929 RBP: 192000359f57 R08: dc00 R09: f52000359f5e R10: f52000359f5e R11: R12: 111029006027 R13: 888034b67020 R14: 192000359f98 R15: 888034b67018 ext4_match fs/ext4/namei.c:1364 [inline] ext4_search_dir+0x2f4/0xa10 fs/ext4/namei.c:1395 search_dirblock fs/ext4/namei.c:1199 [inline] __ext4_find_entry+0x121c/0x1790 fs/ext4/namei.c:1553 ext4_find_entry fs/ext4/namei.c:1602 [inline] ext4_rmdir+0x347/0x1180 fs/ext4/namei.c:3132 vfs_rmdir+0x20a/0x3f0 fs/namei.c:3899 ovl_remove_upper fs/overlayfs/dir.c:825 [inline] ovl_do_remove+0x4d2/0xbe0 fs/overlayfs/dir.c:904 vfs_rmdir+0x20a/0x3f0 fs/namei.c:3899 do_rmdir+0x2a5/0x560 fs/namei.c:3962 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x466459 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:7f08cdd4a188 EFLAGS: 0246 ORIG_RAX: 0054 RAX: ffda RBX: 0056c008 RCX: 00466459 RDX: RSI: RDI: 20c0 RBP: 004bf9fb R08: R09: R10: R11: 0246 R12: 0056c008 R13: 7ffefaa401bf R14: 7f08cdd4a300 R15: 00022000 Allocated by task 8393: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:427 [inline] kasan_kmalloc+0xc2/0xf0 mm/kasan/common.c:506 kasan_kmalloc include/linux/kasan.h:233 [inline] __k
[PATCH v4 2/4] pinctrl: add pinctrl driver on mt8195
This commit includes pinctrl driver for mt8195. Signed-off-by: Zhiyong Tao --- drivers/pinctrl/mediatek/Kconfig |6 + drivers/pinctrl/mediatek/Makefile |1 + drivers/pinctrl/mediatek/pinctrl-mt8195.c | 828 drivers/pinctrl/mediatek/pinctrl-mtk-mt8195.h | 1669 + 4 files changed, 2504 insertions(+) create mode 100644 drivers/pinctrl/mediatek/pinctrl-mt8195.c create mode 100644 drivers/pinctrl/mediatek/pinctrl-mtk-mt8195.h diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index eef17f228669..90f0c8255eaf 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -147,6 +147,12 @@ config PINCTRL_MT8192 default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS +config PINCTRL_MT8195 + bool "Mediatek MT8195 pin control" + depends on OF + depends on ARM64 || COMPILE_TEST + select PINCTRL_MTK_PARIS + config PINCTRL_MT8516 bool "Mediatek MT8516 pin control" depends on OF diff --git a/drivers/pinctrl/mediatek/Makefile b/drivers/pinctrl/mediatek/Makefile index 01218bf4dc30..06fde993ace2 100644 --- a/drivers/pinctrl/mediatek/Makefile +++ b/drivers/pinctrl/mediatek/Makefile @@ -21,5 +21,6 @@ obj-$(CONFIG_PINCTRL_MT8167) += pinctrl-mt8167.o obj-$(CONFIG_PINCTRL_MT8173) += pinctrl-mt8173.o obj-$(CONFIG_PINCTRL_MT8183) += pinctrl-mt8183.o obj-$(CONFIG_PINCTRL_MT8192) += pinctrl-mt8192.o +obj-$(CONFIG_PINCTRL_MT8195)+= pinctrl-mt8195.o obj-$(CONFIG_PINCTRL_MT8516) += pinctrl-mt8516.o obj-$(CONFIG_PINCTRL_MT6397) += pinctrl-mt6397.o diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c new file mode 100644 index ..063f164d7c9b --- /dev/null +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 MediaTek Inc. + * + * Author: Zhiyong Tao + * + */ + +#include "pinctrl-mtk-mt8195.h" +#include "pinctrl-paris.h" + +/* MT8195 have multiple bases to program pin configuration listed as the below: + * iocfg[0]:0x10005000, iocfg[1]:0x11d1, iocfg[2]:0x11d3, + * iocfg[3]:0x11d4, iocfg[4]:0x11e2, iocfg[5]:0x11eb, + * iocfg[6]:0x11f4. + * _i_based could be used to indicate what base the pin should be mapped into. + */ + +#define PIN_FIELD_BASE(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits) \ + PIN_FIELD_CALC(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits, \ + 32, 0) + +#define PINS_FIELD_BASE(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits) \ + PIN_FIELD_CALC(s_pin, e_pin, i_base, s_addr, x_addrs, s_bit, x_bits, \ + 32, 1) + +static const struct mtk_pin_field_calc mt8195_pin_mode_range[] = { + PIN_FIELD(0, 144, 0x300, 0x10, 0, 4), +}; + +static const struct mtk_pin_field_calc mt8195_pin_dir_range[] = { + PIN_FIELD(0, 144, 0x0, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8195_pin_di_range[] = { + PIN_FIELD(0, 144, 0x200, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8195_pin_do_range[] = { + PIN_FIELD(0, 144, 0x100, 0x10, 0, 1), +}; + +static const struct mtk_pin_field_calc mt8195_pin_ies_range[] = { + PIN_FIELD_BASE(0, 0, 4, 0x040, 0x10, 0, 1), + PIN_FIELD_BASE(1, 1, 4, 0x040, 0x10, 1, 1), + PIN_FIELD_BASE(2, 2, 4, 0x040, 0x10, 2, 1), + PIN_FIELD_BASE(3, 3, 4, 0x040, 0x10, 3, 1), + PIN_FIELD_BASE(4, 4, 4, 0x040, 0x10, 4, 1), + PIN_FIELD_BASE(5, 5, 4, 0x040, 0x10, 5, 1), + PIN_FIELD_BASE(6, 6, 4, 0x040, 0x10, 6, 1), + PIN_FIELD_BASE(7, 7, 4, 0x040, 0x10, 7, 1), + PIN_FIELD_BASE(8, 8, 4, 0x040, 0x10, 13, 1), + PIN_FIELD_BASE(9, 9, 4, 0x040, 0x10, 8, 1), + PIN_FIELD_BASE(10, 10, 4, 0x040, 0x10, 14, 1), + PIN_FIELD_BASE(11, 11, 4, 0x040, 0x10, 9, 1), + PIN_FIELD_BASE(12, 12, 4, 0x040, 0x10, 15, 1), + PIN_FIELD_BASE(13, 13, 4, 0x040, 0x10, 10, 1), + PIN_FIELD_BASE(14, 14, 4, 0x040, 0x10, 16, 1), + PIN_FIELD_BASE(15, 15, 4, 0x040, 0x10, 11, 1), + PIN_FIELD_BASE(16, 16, 4, 0x040, 0x10, 17, 1), + PIN_FIELD_BASE(17, 17, 4, 0x040, 0x10, 12, 1), + PIN_FIELD_BASE(18, 18, 2, 0x040, 0x10, 5, 1), + PIN_FIELD_BASE(19, 19, 2, 0x040, 0x10, 12, 1), + PIN_FIELD_BASE(20, 20, 2, 0x040, 0x10, 11, 1), + PIN_FIELD_BASE(21, 21, 2, 0x040, 0x10, 10, 1), + PIN_FIELD_BASE(22, 22, 2, 0x040, 0x10, 0, 1), + PIN_FIELD_BASE(23, 23, 2, 0x040, 0x10, 1, 1), + PIN_FIELD_BASE(24, 24, 2, 0x040, 0x10, 2, 1), + PIN_FIELD_BASE(25, 25, 2, 0x040, 0x10, 4, 1), + PIN_FIELD_BASE(26, 26, 2, 0x040, 0x10, 3, 1), + PIN_FIELD_BASE(27, 27, 2, 0x040, 0x10, 6, 1), + PIN_FIELD_BASE(28, 28, 2, 0x040, 0x10, 7, 1), + PIN_FIELD_BASE(29, 29, 2, 0x040, 0x10, 8, 1), + PIN_FIELD_BASE(30, 30, 2, 0x040, 0x10, 9, 1), + PIN_FIELD_BAS
[PATCH v4 4/4] pinctrl: add rsel setting on MT8195
This patch provides rsel setting on MT8195. Signed-off-by: Zhiyong Tao --- drivers/pinctrl/mediatek/pinctrl-mt8195.c | 22 +++ .../pinctrl/mediatek/pinctrl-mtk-common-v2.c | 14 .../pinctrl/mediatek/pinctrl-mtk-common-v2.h | 10 + drivers/pinctrl/mediatek/pinctrl-paris.c | 16 ++ 4 files changed, 62 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c index a7500e18bb1d..66608b8d346a 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -779,6 +779,25 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_adv_range[] = { PIN_FIELD_BASE(45, 45, 1, 0x040, 0x10, 9, 3), }; +static const struct mtk_pin_field_calc mt8195_pin_rsel_range[] = { + PIN_FIELD_BASE(8, 8, 4, 0x0c0, 0x10, 15, 3), + PIN_FIELD_BASE(9, 9, 4, 0x0c0, 0x10, 0, 3), + PIN_FIELD_BASE(10, 10, 4, 0x0c0, 0x10, 18, 3), + PIN_FIELD_BASE(11, 11, 4, 0x0c0, 0x10, 3, 3), + PIN_FIELD_BASE(12, 12, 4, 0x0c0, 0x10, 21, 3), + PIN_FIELD_BASE(13, 13, 4, 0x0c0, 0x10, 6, 3), + PIN_FIELD_BASE(14, 14, 4, 0x0c0, 0x10, 24, 3), + PIN_FIELD_BASE(15, 15, 4, 0x0c0, 0x10, 9, 3), + PIN_FIELD_BASE(16, 16, 4, 0x0c0, 0x10, 27, 3), + PIN_FIELD_BASE(17, 17, 4, 0x0c0, 0x10, 12, 3), + PIN_FIELD_BASE(29, 29, 2, 0x080, 0x10, 0, 3), + PIN_FIELD_BASE(30, 30, 2, 0x080, 0x10, 3, 3), + PIN_FIELD_BASE(34, 34, 1, 0x0e0, 0x10, 0, 3), + PIN_FIELD_BASE(35, 35, 1, 0x0e0, 0x10, 3, 3), + PIN_FIELD_BASE(44, 44, 1, 0x0e0, 0x10, 6, 3), + PIN_FIELD_BASE(45, 45, 1, 0x0e0, 0x10, 9, 3), +}; + static const struct mtk_pin_reg_calc mt8195_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_MODE] = MTK_RANGE(mt8195_pin_mode_range), [PINCTRL_PIN_REG_DIR] = MTK_RANGE(mt8195_pin_dir_range), @@ -793,6 +812,7 @@ static const struct mtk_pin_reg_calc mt8195_reg_cals[PINCTRL_PIN_REG_MAX] = { [PINCTRL_PIN_REG_R0] = MTK_RANGE(mt8195_pin_r0_range), [PINCTRL_PIN_REG_R1] = MTK_RANGE(mt8195_pin_r1_range), [PINCTRL_PIN_REG_DRV_ADV] = MTK_RANGE(mt8195_pin_drv_adv_range), + [PINCTRL_PIN_REG_RSEL] = MTK_RANGE(mt8195_pin_rsel_range), }; static const char * const mt8195_pinctrl_register_base_names[] = { @@ -823,6 +843,8 @@ static const struct mtk_pin_soc mt8195_data = { .drive_get = mtk_pinconf_drive_get_rev1, .adv_drive_get = mtk_pinconf_adv_drive_get_raw, .adv_drive_set = mtk_pinconf_adv_drive_set_raw, + .rsel_set = mtk_pinconf_rsel_set, + .rsel_get = mtk_pinconf_rsel_get, }; static const struct of_device_id mt8195_pinctrl_of_match[] = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 2b51f4a9b860..d1526d0c6248 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -1041,6 +1041,20 @@ int mtk_pinconf_adv_drive_get_raw(struct mtk_pinctrl *hw, } EXPORT_SYMBOL_GPL(mtk_pinconf_adv_drive_get_raw); +int mtk_pinconf_rsel_set(struct mtk_pinctrl *hw, +const struct mtk_pin_desc *desc, u32 arg) +{ + return mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_RSEL, arg); +} +EXPORT_SYMBOL_GPL(mtk_pinconf_rsel_set); + +int mtk_pinconf_rsel_get(struct mtk_pinctrl *hw, +const struct mtk_pin_desc *desc, u32 *val) +{ + return mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_RSEL, val); +} +EXPORT_SYMBOL_GPL(mtk_pinconf_rsel_get); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Sean Wang "); MODULE_DESCRIPTION("Pin configuration library module for mediatek SoCs"); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h index fd5ce9c5dcbd..570e8da7bf38 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h @@ -67,6 +67,7 @@ enum { PINCTRL_PIN_REG_DRV_E0, PINCTRL_PIN_REG_DRV_E1, PINCTRL_PIN_REG_DRV_ADV, + PINCTRL_PIN_REG_RSEL, PINCTRL_PIN_REG_MAX, }; @@ -237,6 +238,10 @@ struct mtk_pin_soc { const struct mtk_pin_desc *desc, u32 arg); int (*adv_drive_get)(struct mtk_pinctrl *hw, const struct mtk_pin_desc *desc, u32 *val); + int (*rsel_set)(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 arg); + int (*rsel_get)(struct mtk_pinctrl *hw, + const struct mtk_pin_desc *desc, u32 *val); /* Specific driver data */ void*driver_data; @@ -320,5 +325,10 @@ int mtk_pinconf_adv_drive_set_raw(struct mtk_pinctrl *hw, int mtk_pinconf_adv_drive_get_raw(struct mtk_pinctrl *hw, const struct mtk_pin_desc *desc, u32 *val); +int mtk_pinconf_rse
[PATCH v4 1/4] dt-bindings: pinctrl: mt8195: add pinctrl file and binding document
1. This patch adds pinctrl file for mt8195. 2. This patch adds mt8195 compatible node in binding document. Signed-off-by: Zhiyong Tao --- .../bindings/pinctrl/pinctrl-mt8195.yaml | 151 +++ include/dt-bindings/pinctrl/mt8195-pinfunc.h | 962 ++ 2 files changed, 1113 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml create mode 100644 include/dt-bindings/pinctrl/mt8195-pinfunc.h diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml new file mode 100644 index ..2f12ec59eee5 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/pinctrl-mt8195.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mediatek MT8195 Pin Controller + +maintainers: + - Sean Wang + +description: | + The Mediatek's Pin controller is used to control SoC pins. + +properties: + compatible: +const: mediatek,mt8195-pinctrl + + gpio-controller: true + + '#gpio-cells': +description: | + Number of cells in GPIO specifier. Since the generic GPIO binding is used, + the amount of cells must be specified as 2. See the below + mentioned gpio binding representation for description of particular cells. +const: 2 + + gpio-ranges: +description: gpio valid number range. +maxItems: 1 + + reg: +description: | + Physical address base for gpio base registers. There are 8 GPIO + physical address base in mt8195. +maxItems: 8 + + reg-names: +description: | + Gpio base register names. +maxItems: 8 + + interrupt-controller: true + + '#interrupt-cells': +const: 2 + + interrupts: +description: The interrupt outputs to sysirq. +maxItems: 1 + +#PIN CONFIGURATION NODES +patternProperties: + '-pins$': +type: object +description: | + A pinctrl node should contain at least one subnodes representing the + pinctrl groups available on the machine. Each subnode will list the + pins it needs, and how they should be configured, with regard to muxer + configuration, pullups, drive strength, input enable/disable and + input schmitt. + An example of using macro: + pincontroller { +/* GPIO0 set as multifunction GPIO0 */ +gpio_pin { + pinmux = ; +}; +/* GPIO8 set as multifunction SDA0 */ +i2c0_pin { + pinmux = ; +}; + }; +$ref: "pinmux-node.yaml" + +properties: + pinmux: +description: | + Integer array, represents gpio pin number and mux setting. + Supported pin number and mux varies for different SoCs, and are defined + as macros in dt-bindings/pinctrl/-pinfunc.h directly. + + drive-strength: +description: | + It can support some arguments which is from 0 to 7. It can only support + 2/4/6/8/10/12/14/16mA in mt8195. +enum: [0, 1, 2, 3, 4, 5, 6, 7] + + bias-pull-down: true + + bias-pull-up: true + + bias-disable: true + + output-high: true + + output-low: true + + input-enable: true + + input-disable: true + + input-schmitt-enable: true + + input-schmitt-disable: true + +required: + - pinmux + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - '#interrupt-cells' + - gpio-controller + - '#gpio-cells' + - gpio-ranges + +additionalProperties: false + +examples: + - | +#include +#include +pio: pinctrl@10005000 { +compatible = "mediatek,mt8195-pinctrl"; +reg = <0x10005000 0x1000>, + <0x11d1 0x1000>, + <0x11d3 0x1000>, + <0x11d4 0x1000>, + <0x11e2 0x1000>, + <0x11eb 0x1000>, + <0x11f4 0x1000>, + <0x1000b000 0x1000>; +reg-names = "iocfg0", "iocfg_bm", "iocfg_bl", + "iocfg_br", "iocfg_lm", "iocfg_rb", + "iocfg_tl", "eint"; +gpio-controller; +#gpio-cells = <2>; +gpio-ranges = <&pio 0 0 144>; +interrupt-controller; +interrupts = ; +#interrupt-cells = <2>; + +pio-pins { + pinmux = ; + output-low; +}; +}; diff --git a/include/dt-bindings/pinctrl/mt8195-pinfunc.h b/include/dt-bindings/pinctrl/mt8195-pinfunc.h new file mode 100644 index ..666331bb9b40 --- /dev/null +++ b
[PATCH v4 0/4] Mediatek pinctrl patch on mt8195
This series includes 4 patches: 1.add pinctrl file and inding document on mt8195. 2.add pinctrl driver on MT8195. 3.add pinctrl drive for I2C related pins on MT8195. 4.add pinctrl rsel setting on MT8195. Changes in patch v4: 1)fix pinctrl-mt8195.yaml warning error. 2)remove pinctrl device node patch which is based on "mt8195.dtsi". Changes in patch v3: 1)change '^pins' to '-pins$'. 2)change 'state_0_node_a' to 'gpio_pin' which is defined in dts. 3)change 'state_0_node_b' to 'i2c0_pin' which is defined in dts. 4)reorder this series patches. change pinctrl file and binding document together in one patch. There are no changes in v1 & v2. Zhiyong Tao (4): dt-bindings: pinctrl: mt8195: add pinctrl file and binding document pinctrl: add pinctrl driver on mt8195 pinctrl: add drive for I2C related pins on MT8195 pinctrl: add rsel setting on MT8195 .../bindings/pinctrl/pinctrl-mt8195.yaml | 151 ++ drivers/pinctrl/mediatek/Kconfig |6 + drivers/pinctrl/mediatek/Makefile |1 + drivers/pinctrl/mediatek/pinctrl-mt8195.c | 872 + .../pinctrl/mediatek/pinctrl-mtk-common-v2.c | 28 + .../pinctrl/mediatek/pinctrl-mtk-common-v2.h | 15 + drivers/pinctrl/mediatek/pinctrl-mtk-mt8195.h | 1669 + drivers/pinctrl/mediatek/pinctrl-paris.c | 16 + include/dt-bindings/pinctrl/mt8195-pinfunc.h | 962 ++ 9 files changed, 3720 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/pinctrl-mt8195.yaml create mode 100644 drivers/pinctrl/mediatek/pinctrl-mt8195.c create mode 100644 drivers/pinctrl/mediatek/pinctrl-mtk-mt8195.h create mode 100644 include/dt-bindings/pinctrl/mt8195-pinfunc.h -- 2.18.0
[syzbot] BUG: unable to handle kernel NULL pointer dereference in __lookup_slow (2)
Hello, syzbot found the following issue on: HEAD commit:d93a0d43 Merge tag 'block-5.12-2021-04-02' of git://git.ke.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=16519431d0 kernel config: https://syzkaller.appspot.com/x/.config?x=71a75beb62b62a34 dashboard link: https://syzkaller.appspot.com/bug?extid=11c49ce9d4e7896f3406 compiler: Debian clang version 11.0.1-2 Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+11c49ce9d4e7896f3...@syzkaller.appspotmail.com REISERFS (device loop4): Using r5 hash to sort names BUG: kernel NULL pointer dereference, address: #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD 6bb82067 P4D 6bb82067 PUD 6bb81067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 11072 Comm: syz-executor.4 Not tainted 5.12.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffd6. RSP: 0018:c90008f8fa20 EFLAGS: 00010246 RAX: 113872e8 RBX: dc00 RCX: 0004 RDX: RSI: 88802e9d9490 RDI: 88807f140190 RBP: 89c39740 R08: 81c9d4de R09: fbfff200a946 R10: fbfff200a946 R11: R12: R13: 88807f140190 R14: 111005d3b292 R15: 88802e9d9490 FS: 7f894af88700() GS:8880b9c0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: ffd6 CR3: 6bb83000 CR4: 001506f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: __lookup_slow+0x240/0x370 fs/namei.c:1626 lookup_one_len+0x10e/0x200 fs/namei.c:2649 reiserfs_lookup_privroot+0x85/0x1e0 fs/reiserfs/xattr.c:980 reiserfs_fill_super+0x2a69/0x3160 fs/reiserfs/super.c:2176 mount_bdev+0x26c/0x3a0 fs/super.c:1367 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1497 do_new_mount fs/namespace.c:2903 [inline] path_mount+0x188a/0x29a0 fs/namespace.c:3233 do_mount fs/namespace.c:3246 [inline] __do_sys_mount fs/namespace.c:3454 [inline] __se_sys_mount+0x28c/0x320 fs/namespace.c:3431 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x46797a Code: 48 c7 c2 bc ff ff ff f7 d8 64 89 02 b8 ff ff ff ff eb d2 e8 b8 04 00 00 0f 1f 84 00 00 00 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:7f894af87fa8 EFLAGS: 0206 ORIG_RAX: 00a5 RAX: ffda RBX: 2200 RCX: 0046797a RDX: 2000 RSI: 2100 RDI: 7f894af88000 RBP: 7f894af88040 R08: 7f894af88040 R09: 2000 R10: R11: 0206 R12: 2000 R13: 2100 R14: 7f894af88000 R15: 20011500 Modules linked in: CR2: ---[ end trace a1b8dbb111baf993 ]--- RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffd6. RSP: 0018:c90008f8fa20 EFLAGS: 00010246 RAX: 113872e8 RBX: dc00 RCX: 0004 RDX: RSI: 88802e9d9490 RDI: 88807f140190 RBP: 89c39740 R08: 81c9d4de R09: fbfff200a946 R10: fbfff200a946 R11: R12: R13: 88807f140190 R14: 111005d3b292 R15: 88802e9d9490 FS: 7f894af88700() GS:8880b9c0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: ffd6 CR3: 6bb83000 CR4: 001506f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
[syzbot] KASAN: slab-out-of-bounds Read in reiserfs_xattr_get
Hello, syzbot found the following issue on: HEAD commit:3a229812 Merge tag 'arm-fixes-5.11-2' of git://git.kernel... git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=16b4d196d0 kernel config: https://syzkaller.appspot.com/x/.config?x=f91155ccddaf919c dashboard link: https://syzkaller.appspot.com/bug?extid=72ba979b6681c3369db4 compiler: Debian clang version 11.0.1-2 Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+72ba979b6681c3369...@syzkaller.appspotmail.com loop3: detected capacity change from 0 to 65534 == BUG: KASAN: slab-out-of-bounds in reiserfs_xattr_get+0xe0/0x590 fs/reiserfs/xattr.c:681 Read of size 8 at addr 888028983198 by task syz-executor.3/4211 CPU: 1 PID: 4211 Comm: syz-executor.3 Not tainted 5.12.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x176/0x24e lib/dump_stack.c:120 print_address_description+0x5f/0x3a0 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report+0x15c/0x200 mm/kasan/report.c:416 reiserfs_xattr_get+0xe0/0x590 fs/reiserfs/xattr.c:681 reiserfs_get_acl+0x63/0x670 fs/reiserfs/xattr_acl.c:211 get_acl+0x152/0x2e0 fs/posix_acl.c:141 check_acl fs/namei.c:294 [inline] acl_permission_check fs/namei.c:339 [inline] generic_permission+0x2ed/0x5b0 fs/namei.c:392 do_inode_permission fs/namei.c:446 [inline] inode_permission+0x28e/0x500 fs/namei.c:513 may_open+0x228/0x3e0 fs/namei.c:2985 do_open fs/namei.c:3365 [inline] path_openat+0x2697/0x3860 fs/namei.c:3500 do_filp_open+0x1a3/0x3b0 fs/namei.c:3527 do_sys_openat2+0xba/0x380 fs/open.c:1187 do_sys_open fs/open.c:1203 [inline] __do_sys_openat fs/open.c:1219 [inline] __se_sys_openat fs/open.c:1214 [inline] __x64_sys_openat+0x1c8/0x1f0 fs/open.c:1214 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x419544 Code: 84 00 00 00 00 00 44 89 54 24 0c e8 96 f9 ff ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 c8 f9 ff ff 8b 44 RSP: 002b:7fa357a03f30 EFLAGS: 0293 ORIG_RAX: 0101 RAX: ffda RBX: 2200 RCX: 00419544 RDX: 0001 RSI: 2100 RDI: ff9c RBP: 2100 R08: R09: 2000 R10: R11: 0293 R12: 0001 R13: 2100 R14: 7fa357a04000 R15: 20065600 Allocated by task 4210: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:427 [inline] kasan_kmalloc+0xc2/0xf0 mm/kasan/common.c:506 kasan_kmalloc include/linux/kasan.h:233 [inline] kmem_cache_alloc_trace+0x21b/0x350 mm/slub.c:2934 kmalloc include/linux/slab.h:554 [inline] kzalloc include/linux/slab.h:684 [inline] smk_fetch security/smack/smack_lsm.c:288 [inline] smack_d_instantiate+0x65c/0xcc0 security/smack/smack_lsm.c:3411 security_d_instantiate+0xa5/0x100 security/security.c:1987 d_instantiate_new+0x61/0x110 fs/dcache.c:2025 ext4_add_nondir+0x22b/0x290 fs/ext4/namei.c:2590 ext4_symlink+0x8ce/0xe90 fs/ext4/namei.c:3417 vfs_symlink+0x3a0/0x540 fs/namei.c:4178 do_symlinkat+0x1c9/0x440 fs/namei.c:4208 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 4210: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track+0x3d/0x70 mm/kasan/common.c:46 kasan_set_free_info+0x1f/0x40 mm/kasan/generic.c:357 kasan_slab_free+0x100/0x140 mm/kasan/common.c:360 kasan_slab_free include/linux/kasan.h:199 [inline] slab_free_hook mm/slub.c:1562 [inline] slab_free_freelist_hook+0x171/0x270 mm/slub.c:1600 slab_free mm/slub.c:3161 [inline] kfree+0xcf/0x2d0 mm/slub.c:4213 smk_fetch security/smack/smack_lsm.c:300 [inline] smack_d_instantiate+0x6db/0xcc0 security/smack/smack_lsm.c:3411 security_d_instantiate+0xa5/0x100 security/security.c:1987 d_instantiate_new+0x61/0x110 fs/dcache.c:2025 ext4_add_nondir+0x22b/0x290 fs/ext4/namei.c:2590 ext4_symlink+0x8ce/0xe90 fs/ext4/namei.c:3417 vfs_symlink+0x3a0/0x540 fs/namei.c:4178 do_symlinkat+0x1c9/0x440 fs/namei.c:4208 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Last potentially related work creation: kasan_save_stack+0x27/0x50 mm/kasan/common.c:38 kasan_record_aux_stack+0xee/0x120 mm/kasan/generic.c:345 __call_rcu kernel/rcu/tree.c:3039 [inline] call_rcu+0x130/0x8e0 kernel/rcu/tree.c:3114 fib6_info_release include/net/ip6_fib.h:337 [inline] nsim_rt6_release drivers/net/netdevsim/fib.c:507 [inline] nsim_fib6_event_fini+0x100/0x1f0 drivers
Re: [PATCH] message/fusion: Use BUG_ON instead of if condition followed by BUG.
On Tue, 30 Mar 2021 05:46:01 -0700, zhouchuangao wrote: > BUG_ON() uses unlikely in if(), which can be optimized at compile time. Applied to 5.13/scsi-queue, thanks! [1/1] message/fusion: Use BUG_ON instead of if condition followed by BUG. https://git.kernel.org/mkp/scsi/c/4dec8004de29 -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH] scsi: bfa: Remove unnecessary struct declaration
On Thu, 1 Apr 2021 14:35:34 +0800, Wan Jiabing wrote: > struct bfa_fcs_s is declared twice. One is declared > at 50th line. Remove the duplicate. > struct bfa_fcs_fabric_s is defined at 175th line. > Remove unnecessary declaration. Applied to 5.13/scsi-queue, thanks! [1/1] scsi: bfa: Remove unnecessary struct declaration https://git.kernel.org/mkp/scsi/c/c3b0d087763f -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH][next] scsi: ufs: Fix out-of-bounds warnings in ufshcd_exec_raw_upiu_cmd
On Wed, 31 Mar 2021 17:43:38 -0500, Gustavo A. R. Silva wrote: > Fix the following out-of-bounds warnings by enclosing > some structure members into new structure objects upiu_req > and upiu_rsp: > > include/linux/fortify-string.h:20:29: warning: '__builtin_memcpy' offset [29, > 48] from the object at 'treq' is out of the bounds of referenced subobject > 'req_header' with type 'struct utp_upiu_header' at offset 16 [-Warray-bounds] > include/linux/fortify-string.h:20:29: warning: '__builtin_memcpy' offset [61, > 80] from the object at 'treq' is out of the bounds of referenced subobject > 'rsp_header' with type 'struct utp_upiu_header' at offset 48 [-Warray-bounds] > arch/m68k/include/asm/string.h:72:25: warning: '__builtin_memcpy' offset [29, > 48] from the object at 'treq' is out of the bounds of referenced subobject > 'req_header' with type 'struct utp_upiu_header' at offset 16 [-Warray-bounds] > arch/m68k/include/asm/string.h:72:25: warning: '__builtin_memcpy' offset [61, > 80] from the object at 'treq' is out of the bounds of referenced subobject > 'rsp_header' with type 'struct utp_upiu_header' at offset 48 [-Warray-bounds] > > [...] Applied to 5.13/scsi-queue, thanks! [1/1] scsi: ufs: Fix out-of-bounds warnings in ufshcd_exec_raw_upiu_cmd https://git.kernel.org/mkp/scsi/c/1352eec8c0da -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH -next] scsi: fnic: remove unnecessary spin_lock_init() and INIT_LIST_HEAD()
On Tue, 30 Mar 2021 20:59:11 +0800, Yang Yingliang wrote: > The spinlock and list head of fnic_list is initialized statically. > It is unnecessary to initialize by spin_lock_init() and INIT_LIST_HEAD(). Applied to 5.13/scsi-queue, thanks! [1/1] scsi: fnic: remove unnecessary spin_lock_init() and INIT_LIST_HEAD() https://git.kernel.org/mkp/scsi/c/aa6f2fccd711 -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v7 3/4] spmi: mediatek: Add support for MT6873/8192
Hi Maintainers, Gentle pin for this patch. Thanks. On Sun, 2021-03-14 at 02:00 +0800, Hsin-Hsiung Wang wrote: > Add spmi support for MT6873/8192. > > Signed-off-by: Hsin-Hsiung Wang > --- > changes since v6: > - remove unused spinlock. > - remove redundant check for slave id. > --- > drivers/spmi/Kconfig | 10 + > drivers/spmi/Makefile| 2 + > drivers/spmi/spmi-mtk-pmif.c | 465 +++ > 3 files changed, 477 insertions(+) > create mode 100644 drivers/spmi/spmi-mtk-pmif.c > > diff --git a/drivers/spmi/Kconfig b/drivers/spmi/Kconfig > index a53bad541f1a..692bac98a120 100644 > --- a/drivers/spmi/Kconfig > +++ b/drivers/spmi/Kconfig > @@ -25,4 +25,14 @@ config SPMI_MSM_PMIC_ARB > This is required for communicating with Qualcomm PMICs and > other devices that have the SPMI interface. > > +config SPMI_MTK_PMIF > + tristate "Mediatek SPMI Controller (PMIC Arbiter)" > + help > + If you say yes to this option, support will be included for the > + built-in SPMI PMIC Arbiter interface on Mediatek family > + processors. > + > + This is required for communicating with Mediatek PMICs and > + other devices that have the SPMI interface. > + > endif > diff --git a/drivers/spmi/Makefile b/drivers/spmi/Makefile > index 55a94cadeffe..76fb3b3ab510 100644 > --- a/drivers/spmi/Makefile > +++ b/drivers/spmi/Makefile > @@ -5,3 +5,5 @@ > obj-$(CONFIG_SPMI) += spmi.o > > obj-$(CONFIG_SPMI_MSM_PMIC_ARB) += spmi-pmic-arb.o > +obj-$(CONFIG_SPMI_MTK_PMIF) += spmi-mtk-pmif.o > + > diff --git a/drivers/spmi/spmi-mtk-pmif.c b/drivers/spmi/spmi-mtk-pmif.c > new file mode 100644 > index ..94c45d46ab0c > --- /dev/null > +++ b/drivers/spmi/spmi-mtk-pmif.c > @@ -0,0 +1,465 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// > +// Copyright (c) 2021 MediaTek Inc. > + > +#include > +#include > +#include > +#include > +#include > + > +#define SWINF_IDLE 0x00 > +#define SWINF_WFVLDCLR 0x06 > + > +#define GET_SWINF(x) (((x) >> 1) & 0x7) > + > +#define PMIF_CMD_REG_0 0 > +#define PMIF_CMD_REG 1 > +#define PMIF_CMD_EXT_REG 2 > +#define PMIF_CMD_EXT_REG_LONG3 > + > +#define PMIF_DELAY_US 10 > +#define PMIF_TIMEOUT_US (10 * 1000) > + > +#define PMIF_CHAN_OFFSET 0x5 > + > +#define PMIF_MAX_CLKS3 > + > +#define SPMI_OP_ST_BUSY 1 > + > +struct ch_reg { > + u32 ch_sta; > + u32 wdata; > + u32 rdata; > + u32 ch_send; > + u32 ch_rdy; > +}; > + > +struct pmif_data { > + const u32 *regs; > + const u32 *spmimst_regs; > + u32 soc_chan; > +}; > + > +struct pmif { > + void __iomem*base; > + void __iomem*spmimst_base; > + struct ch_reg chan; > + struct clk_bulk_data clks[PMIF_MAX_CLKS]; > + u32 nclks; > + const struct pmif_data *data; > +}; > + > +static const char * const pmif_clock_names[] = { > + "pmif_sys_ck", "pmif_tmr_ck", "spmimst_clk_mux", > +}; > + > +enum pmif_regs { > + PMIF_INIT_DONE, > + PMIF_INF_EN, > + PMIF_ARB_EN, > + PMIF_CMDISSUE_EN, > + PMIF_TIMER_CTRL, > + PMIF_SPI_MODE_CTRL, > + PMIF_IRQ_EVENT_EN_0, > + PMIF_IRQ_FLAG_0, > + PMIF_IRQ_CLR_0, > + PMIF_IRQ_EVENT_EN_1, > + PMIF_IRQ_FLAG_1, > + PMIF_IRQ_CLR_1, > + PMIF_IRQ_EVENT_EN_2, > + PMIF_IRQ_FLAG_2, > + PMIF_IRQ_CLR_2, > + PMIF_IRQ_EVENT_EN_3, > + PMIF_IRQ_FLAG_3, > + PMIF_IRQ_CLR_3, > + PMIF_IRQ_EVENT_EN_4, > + PMIF_IRQ_FLAG_4, > + PMIF_IRQ_CLR_4, > + PMIF_WDT_EVENT_EN_0, > + PMIF_WDT_FLAG_0, > + PMIF_WDT_EVENT_EN_1, > + PMIF_WDT_FLAG_1, > + PMIF_SWINF_0_STA, > + PMIF_SWINF_0_WDATA_31_0, > + PMIF_SWINF_0_RDATA_31_0, > + PMIF_SWINF_0_ACC, > + PMIF_SWINF_0_VLD_CLR, > + PMIF_SWINF_1_STA, > + PMIF_SWINF_1_WDATA_31_0, > + PMIF_SWINF_1_RDATA_31_0, > + PMIF_SWINF_1_ACC, > + PMIF_SWINF_1_VLD_CLR, > + PMIF_SWINF_2_STA, > + PMIF_SWINF_2_WDATA_31_0, > + PMIF_SWINF_2_RDATA_31_0, > + PMIF_SWINF_2_ACC, > + PMIF_SWINF_2_VLD_CLR, > + PMIF_SWINF_3_STA, > + PMIF_SWINF_3_WDATA_31_0, > + PMIF_SWINF_3_RDATA_31_0, > + PMIF_SWINF_3_ACC, > + PMIF_SWINF_3_VLD_CLR, > +}; > + > +static const u32 mt6873_regs[] = { > + [PMIF_INIT_DONE] = 0x, > + [PMIF_INF_EN] = 0x0024, > + [PMIF_ARB_EN] = 0x0150, > + [PMIF_CMDISSUE_EN] =0x03B4, > + [PMIF_TIMER_CTRL] = 0x03E0, > + [PMIF_SPI_MODE_CTRL] = 0x0400, > + [PMIF_IRQ_EVENT_EN_0] = 0x0418, > + [PMIF_IRQ_FLAG_0] = 0x0420, > + [PMIF_IRQ_CLR_0] = 0x0424, > + [PMIF_IRQ_EVENT_EN_1] = 0x0428, > + [PMIF_IRQ_FLAG_1] = 0x0430, > + [PMIF_IRQ_CLR_1] = 0x0434, > + [PMIF_IRQ_EVENT_EN_2] = 0x0438, > + [PMIF_IRQ_FLAG_2] = 0x0440, > + [PMIF_IRQ_CLR_2] = 0x0444, > + [PMIF_IRQ_EVENT_EN_3] = 0x0448, > + [PMIF_IRQ_FLAG_3] = 0x0
[PATCH RFC v2 0/4] virtio net: spurious interrupt related fixes
With the implementation of napi-tx in virtio driver, we clean tx descriptors from rx napi handler, for the purpose of reducing tx complete interrupts. But this introduces a race where tx complete interrupt has been raised, but the handler finds there is no work to do because we have done the work in the previous rx interrupt handler. A similar issue exists with polling from start_xmit, it is however less common because of the delayed cb optimization of the split ring - but will likely affect the packed ring once that is more common. In particular, this was reported to lead to the following warning msg: [ 3588.010778] irq 38: nobody cared (try booting with the "irqpoll" option) [ 3588.017938] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 5.3.0-19-generic #20~18.04.2-Ubuntu [ 3588.017940] Call Trace: [ 3588.017942] [ 3588.017951] dump_stack+0x63/0x85 [ 3588.017953] __report_bad_irq+0x35/0xc0 [ 3588.017955] note_interrupt+0x24b/0x2a0 [ 3588.017956] handle_irq_event_percpu+0x54/0x80 [ 3588.017957] handle_irq_event+0x3b/0x60 [ 3588.017958] handle_edge_irq+0x83/0x1a0 [ 3588.017961] handle_irq+0x20/0x30 [ 3588.017964] do_IRQ+0x50/0xe0 [ 3588.017966] common_interrupt+0xf/0xf [ 3588.017966] [ 3588.017989] handlers: [ 3588.020374] [<1b9f1da8>] vring_interrupt [ 3588.025099] Disabling IRQ #38 This patchset attempts to fix this by cleaning up a bunch of races related to the handling of sq callbacks (aka tx interrupts). Very lightly tested, sending out for help with testing, early feedback and flames. Thanks! Michael S. Tsirkin (4): virtio: fix up virtio_disable_cb virtio_net: disable cb aggressively virtio_net: move tx vq operation under tx queue lock virtio_net: move txq wakeups under tx q lock drivers/net/virtio_net.c | 35 +-- drivers/virtio/virtio_ring.c | 26 +- 2 files changed, 54 insertions(+), 7 deletions(-) -- MST
Re: [PATCH v1 0/2] scsi: libsas: few clean up patches
On Thu, 25 Mar 2021 20:29:54 +0800, Luo Jiaxing wrote: > Two types of errors are detected by the checkpatch. > 1. Alignment between switches and cases > 2. Improper use of some spaces > > Here are the clean up patches. > > Luo Jiaxing (2): > scsi: libsas: make switch and case at the same indent in > sas_to_ata_err() > scsi: libsas: clean up for white spaces > > [...] Applied to 5.13/scsi-queue, thanks! [1/2] scsi: libsas: make switch and case at the same indent in sas_to_ata_err() https://git.kernel.org/mkp/scsi/c/c03f2422b9f5 [2/2] scsi: libsas: clean up for white spaces https://git.kernel.org/mkp/scsi/c/857a80bbd732 -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v2] scsi: libsas: Reset num_scatter if libata mark qc as NODATA
On Thu, 18 Mar 2021 15:56:32 -0700, Jolly Shah wrote: > When the cache_type for the scsi device is changed, the scsi layer > issues a MODE_SELECT command. The caching mode details are communicated > via a request buffer associated with the scsi command with data > direction set as DMA_TO_DEVICE (scsi_mode_select). When this command > reaches the libata layer, as a part of generic initial setup, libata > layer sets up the scatterlist for the command using the scsi command > (ata_scsi_qc_new). This command is then translated by the libata layer > into ATA_CMD_SET_FEATURES (ata_scsi_mode_select_xlat). The libata layer > treats this as a non data command (ata_mselect_caching), since it only > needs an ata taskfile to pass the caching on/off information to the > device. It does not need the scatterlist that has been setup, so it does > not perform dma_map_sg on the scatterlist (ata_qc_issue). Unfortunately, > when this command reaches the libsas layer(sas_ata_qc_issue), libsas > layer sees it as a non data command with a scatterlist. It cannot > extract the correct dma length, since the scatterlist has not been > mapped with dma_map_sg for a DMA operation. When this partially > constructed SAS task reaches pm80xx LLDD, it results in below warning. > > [...] Applied to 5.12/scsi-fixes, thanks! [1/1] scsi: libsas: Reset num_scatter if libata mark qc as NODATA https://git.kernel.org/mkp/scsi/c/176ddd89171d -- Martin K. Petersen Oracle Linux Engineering
[PATCH RFC v2 4/4] virtio_net: move txq wakeups under tx q lock
We currently check num_free outside tx q lock which is unsafe: new packets can arrive meanwhile and there won't be space in the queue. Thus a spurious queue wakeup causing overhead and even packet drops. Move the check under the lock to fix that. Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 460ccdbb840e..febaf55ec1f6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1431,11 +1431,12 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) if (__netif_tx_trylock(txq)) { virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) + netif_tx_wake_queue(txq); + __netif_tx_unlock(txq); } - - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) - netif_tx_wake_queue(txq); } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -1519,6 +1520,9 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) + netif_tx_wake_queue(txq); + opaque = virtqueue_enable_cb_prepare(sq->vq); done = napi_complete_done(napi, 0); @@ -1539,9 +1543,6 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) } } - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) - netif_tx_wake_queue(txq); - return 0; } -- MST
[PATCH RFC v2 3/4] virtio_net: move tx vq operation under tx queue lock
It's unsafe to operate a vq from multiple threads. Unfortunately this is exactly what we do when invoking clean tx poll from rx napi. As a fix move everything that deals with the vq to under tx lock. Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 16d5abed582c..460ccdbb840e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1505,6 +1505,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; + int opaque; + bool done; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ @@ -1514,10 +1516,28 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); + + opaque = virtqueue_enable_cb_prepare(sq->vq); + + done = napi_complete_done(napi, 0); + + if (!done) + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); - virtqueue_napi_complete(napi, sq->vq, 0); + if (done) { + if (unlikely(virtqueue_poll(sq->vq, opaque))) { + if (napi_schedule_prep(napi)) { + __netif_tx_lock(txq, raw_smp_processor_id()); + virtqueue_disable_cb(sq->vq); + __netif_tx_unlock(txq); + __napi_schedule(napi); + } + } + } if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); -- MST
[PATCH RFC v2 2/4] virtio_net: disable cb aggressively
There are currently two cases where we poll TX vq not in response to a callback: start xmit and rx napi. We currently do this with callbacks enabled which can cause extra interrupts from the card. Used not to be a big issue as we run with interrupts disabled but that is no longer the case, and in some cases the rate of spurious interrupts is so high linux detects this and actually kills the interrupt. Fix up by disabling the callbacks before polling the tx vq. Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 82e520d2cb12..16d5abed582c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1429,6 +1429,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) return; if (__netif_tx_trylock(txq)) { + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); } @@ -1582,6 +1583,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) bool use_napi = sq->napi.weight; /* Free up any pending old buffers before queueing new ones. */ + virtqueue_disable_cb(sq->vq); free_old_xmit_skbs(sq, false); if (use_napi && kick) -- MST
[PATCH RFC v2 1/4] virtio: fix up virtio_disable_cb
virtio_disable_cb is currently a nop for split ring with event index. This is because it used to be always called from a callback when we know device won't trigger more events until we update the index. However, now that we run with interrupts enabled a lot we also poll without a callback so that is different: disabling callbacks will help reduce the number of spurious interrupts. Further, if using event index with a packed ring, and if being called from a callback, we actually do disable interrupts which is unnecessary. Fix both issues by tracking whenever we get a callback. If that is the case disabling interrupts with event index can be a nop. If not the case disable interrupts. Note: with a split ring there's no explicit "no interrupts" value. For now we write a fixed value so our chance of triggering an interupt is 1/ring size. It's probably better to write something related to the last used index there to reduce the chance even further. For now I'm keeping it simple. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 71e16b53e9c1..88f0b16b11b8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -113,6 +113,9 @@ struct vring_virtqueue { /* Last used index we've seen. */ u16 last_used_idx; + /* Hint for event idx: already triggered no need to disable. */ + bool event_triggered; + union { /* Available for split ring */ struct { @@ -739,7 +742,10 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - if (!vq->event) + if (vq->event) + /* TODO: this is a hack. Figure out a cleaner value to write. */ + vring_used_event(&vq->split.vring) = 0x0; + else vq->split.vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); @@ -1605,6 +1611,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); @@ -1919,6 +1926,12 @@ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + /* If device triggered an event already it won't trigger one again: +* no need to disable. +*/ + if (vq->event_triggered) + return; + if (vq->packed_ring) virtqueue_disable_cb_packed(_vq); else @@ -1942,6 +1955,9 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : virtqueue_enable_cb_prepare_split(_vq); } @@ -2005,6 +2021,9 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + if (vq->event_triggered) + vq->event_triggered = false; + return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); } @@ -2044,6 +2063,10 @@ irqreturn_t vring_interrupt(int irq, void *_vq) if (unlikely(vq->broken)) return IRQ_HANDLED; + /* Just a hint for performance: so it's ok that this can be racy! */ + if (vq->event) + vq->event_triggered = true; + pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) vq->vq.callback(&vq->vq); @@ -2083,6 +2106,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; + vq->event_triggered = false; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); #ifdef DEBUG -- MST
[syzbot] KASAN: use-after-free Read in skcipher_walk_next
Hello, syzbot found the following issue on: HEAD commit:4fa56ad0 Merge tag 'for-linus' of git://git.kernel.org/pub.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=17dbd09ad0 kernel config: https://syzkaller.appspot.com/x/.config?x=9320464bf47598bd dashboard link: https://syzkaller.appspot.com/bug?extid=4061a98a8ab454dde8ff Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+4061a98a8ab454dde...@syzkaller.appspotmail.com == BUG: KASAN: use-after-free in memcpy include/linux/fortify-string.h:191 [inline] BUG: KASAN: use-after-free in skcipher_next_copy crypto/skcipher.c:292 [inline] BUG: KASAN: use-after-free in skcipher_walk_next+0xb69/0x1680 crypto/skcipher.c:379 Read of size 2785 at addr 8880781c by task kworker/u4:3/204 CPU: 0 PID: 204 Comm: kworker/u4:3 Not tainted 5.12.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: pencrypt_parallel padata_parallel_worker Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232 __kasan_report mm/kasan/report.c:399 [inline] kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:416 check_region_inline mm/kasan/generic.c:180 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:186 memcpy+0x20/0x60 mm/kasan/shadow.c:65 memcpy include/linux/fortify-string.h:191 [inline] skcipher_next_copy crypto/skcipher.c:292 [inline] skcipher_walk_next+0xb69/0x1680 crypto/skcipher.c:379 skcipher_walk_done+0x7a3/0xf00 crypto/skcipher.c:159 gcmaes_crypt_by_sg+0x377/0x8a0 arch/x86/crypto/aesni-intel_glue.c:694 The buggy address belongs to the page: page:ea0001e07000 refcount:0 mapcount:-128 mapping: index:0x1 pfn:0x781c0 flags: 0xfff000() raw: 00fff000 ea0001e06808 ea0001c67008 raw: 0001 0004 ff7f page dumped because: kasan: bad access detected Memory state around the buggy address: 8880781bff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 8880781bff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >8880781c: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ 8880781c0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff 8880781c0100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff == --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
linux-next: manual merge of the kvm-arm tree with the arm64 tree
Hi all, Today's linux-next merge of the kvm-arm tree got a conflict in: arch/arm64/include/asm/assembler.h between commits: 27248fe1abb2 ("arm64: assembler: remove conditional NEON yield macros") 13150149aa6d ("arm64: fpsimd: run kernel mode NEON with softirqs disabled") from the arm64 tree and commits: 8f4de66e247b ("arm64: asm: Provide set_sctlr_el2 macro") 755db23420a1 ("KVM: arm64: Generate final CTR_EL0 value when running in Protected mode") from the kvm-arm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc arch/arm64/include/asm/assembler.h index ab569b0b45fc,34ddd8a0f3dd.. --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@@ -15,7 -15,7 +15,8 @@@ #include #include +#include + #include #include #include #include @@@ -701,25 -705,95 +714,33 @@@ USER(\label, ic ivau, \tmp2) // inval isb .endm + .macro set_sctlr_el1, reg + set_sctlr sctlr_el1, \reg + .endm + + .macro set_sctlr_el2, reg + set_sctlr sctlr_el2, \reg + .endm + -/* - * Check whether to yield to another runnable task from kernel mode NEON code - * (which runs with preemption disabled). - * - * if_will_cond_yield_neon - *// pre-yield patchup code - * do_cond_yield_neon - *// post-yield patchup code - * endif_yield_neon - * - * where is optional, and marks the point where execution will resume - * after a yield has been performed. If omitted, execution resumes right after - * the endif_yield_neon invocation. Note that the entire sequence, including - * the provided patchup code, will be omitted from the image if - * CONFIG_PREEMPTION is not defined. - * - * As a convenience, in the case where no patchup code is required, the above - * sequence may be abbreviated to - * - * cond_yield_neon - * - * Note that the patchup code does not support assembler directives that change - * the output section, any use of such directives is undefined. - * - * The yield itself consists of the following: - * - Check whether the preempt count is exactly 1 and a reschedule is also - * needed. If so, calling of preempt_enable() in kernel_neon_end() will - * trigger a reschedule. If it is not the case, yielding is pointless. - * - Disable and re-enable kernel mode NEON, and branch to the yield fixup - * code. - * - * This macro sequence may clobber all CPU state that is not guaranteed by the - * AAPCS to be preserved across an ordinary function call. - */ - - .macro cond_yield_neon, lbl - if_will_cond_yield_neon - do_cond_yield_neon - endif_yield_neon\lbl - .endm - - .macro if_will_cond_yield_neon -#ifdef CONFIG_PREEMPTION - get_current_taskx0 - ldr x0, [x0, #TSK_TI_PREEMPT] - sub x0, x0, #PREEMPT_DISABLE_OFFSET - cbz x0, .Lyield_\@ - /* fall through to endif_yield_neon */ - .subsection 1 -.Lyield_\@ : -#else - .section".discard.cond_yield_neon", "ax" -#endif - .endm - - .macro do_cond_yield_neon - bl kernel_neon_end - bl kernel_neon_begin - .endm - - .macro endif_yield_neon, lbl - .ifnb \lbl - b \lbl - .else - b .Lyield_out_\@ - .endif - .previous -.Lyield_out_\@ : - .endm - /* - * Check whether preempt-disabled code should yield as soon as it - * is able. This is the case if re-enabling preemption a single - * time results in a preempt count of zero, and the TIF_NEED_RESCHED - * flag is set. (Note that the latter is stored negated in the - * top word of the thread_info::preempt_count field) + * Check whether preempt/bh-disabled asm code should yield as soon as + * it is able. This is the case if we are currently running in task + * context, and either a softirq is pending, or the TIF_NEED_RESCHED + * flag is set and re-enabling preemption a single time would result in + * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is + * stored negated in the top word of the thread_info::preempt_count + * field) */ - .macro cond_yield, lbl:req, tmp:req -#ifdef CONFIG_PREEMPTION + .macro cond_yield, lbl:req, tmp:req, tmp2:req get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] + /* + * If we are serving a softirq, t
Re: [RFC] mm: activate access-more-than-once page via NUMA balancing
Yu Zhao writes: > On Fri, Mar 26, 2021 at 12:21 AM Huang, Ying wrote: >> >> Mel Gorman writes: >> >> > On Thu, Mar 25, 2021 at 12:33:45PM +0800, Huang, Ying wrote: >> >> > I caution against this patch. >> >> > >> >> > It's non-deterministic for a number of reasons. As it requires NUMA >> >> > balancing to be enabled, the pageout behaviour of a system changes when >> >> > NUMA balancing is active. If this led to pages being artificially and >> >> > inappropriately preserved, NUMA balancing could be disabled for the >> >> > wrong reasons. It only applies to pages that have no target node so >> >> > memory policies affect which pages are activated differently. Similarly, >> >> > NUMA balancing does not scan all VMAs and some pages may never trap a >> >> > NUMA fault as a result. The timing of when an address space gets scanned >> >> > is driven by the locality of pages and so the timing of page activation >> >> > potentially becomes linked to whether pages are local or need to migrate >> >> > (although not right now for this patch as it only affects pages with a >> >> > target nid of NUMA_NO_NODE). In other words, changes in NUMA balancing >> >> > that affect migration potentially affect the aging rate. Similarly, >> >> > the activate rate of a process with a single thread and multiple threads >> >> > potentially have different activation rates. >> >> > >> >> > Finally, the NUMA balancing scan algorithm is sub-optimal. It >> >> > potentially >> >> > scans the entire address space even though only a small number of pages >> >> > are scanned. This is particularly problematic when a process has a lot >> >> > of threads because threads are redundantly scanning the same regions. If >> >> > NUMA balancing ever introduced range tracking of faulted pages to limit >> >> > how much scanning it has to do, it would inadvertently cause a change in >> >> > page activation rate. >> >> > >> >> > NUMA balancing is about page locality, it should not get conflated with >> >> > page aging. >> >> >> >> I understand your concerns about binding the NUMA balancing and page >> >> reclaiming. The requirement of the page locality and page aging is >> >> different, so the policies need to be different. This is the wrong part >> >> of the patch. >> >> >> >> From another point of view, it's still possible to share some underlying >> >> mechanisms (and code) between them. That is, scanning the page tables >> >> to make pages unaccessible and capture the page accesses via the page >> >> fault. >> > >> > Potentially yes but not necessarily recommended for page aging. NUMA >> > balancing has to be careful about the rate it scans pages to avoid >> > excessive overhead so it's driven by locality. The scanning happens >> > within a tasks context so during that time, the task is not executing >> > its normal work and it incurs the overhead for faults. Generally, this >> > is not too much overhead because pages get migrated locally, the scan >> > rate drops and so does the overhead. >> > >> > However, if you want to drive page aging, that is constant so the rate >> > could not be easily adapted in a way that would be deterministic. >> > >> >> Now these page accessing information is used for the page >> >> locality. Do you think it's a good idea to use these information for >> >> the page aging too (but with a different policy as you pointed out)? >> >> >> > >> > I'm not completely opposed to it but I think the overhead it would >> > introduce could be severe. Worse, if a workload fits in memory and there >> > is limited to no memory pressure, it's all overhead for no gain. Early >> > generations of NUMA balancing had to find a balance to sure the gains >> > from locality exceeded the cost of measuring locality and doing the same >> > for page aging in some ways is even more challenging. >> >> Yes. I will think more about it from the overhead vs. gain point of >> view. Thanks a lot for your sharing on that. >> >> >> From yet another point of view :-), in current NUMA balancing >> >> implementation, it's assumed that the node private pages can fit in the >> >> accessing node. But this may be not always true. Is it a valid >> >> optimization to migrate the hot private pages first? >> >> >> > >> > I'm not sure how the hotness of pages could be ranked. At the time of a >> > hinting fault, the page is by definition active now because it was been >> > accessed. Prioritising what pages to migrate based on the number of faults >> > that have been trapped would have to be stored somewhere. >> >> Yes. We need to store some information about that. In an old version >> of the patchset which uses NUMA balancing to promote hot pages from the >> PMEM to DRAM, we have designed a method to measure the hotness of the >> pages. The basic idea is as follows, >> >> - When the page table of a process is scanned, the latest N scanning >> address ranges and scan times are recorded in a ring buffer of >> mm_struct. >> >> - In hint page fault handler,
Re: [PATCH 4.19 00/66] 4.19.187-rc1 review
On Mon, 12 Apr 2021 at 14:13, Greg Kroah-Hartman wrote: > > This is the start of the stable review cycle for the 4.19.187 release. > There are 66 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Wed, 14 Apr 2021 08:39:44 +. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > > https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.187-rc1.gz > or in the git tree and branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > linux-4.19.y > and the diffstat can be found below. > > thanks, > > greg k-h Results from Linaro’s test farm. No regressions on arm64, arm, x86_64, and i386. Tested-by: Linux Kernel Functional Testing ## Build * kernel: 4.19.187-rc1 * git: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git * git branch: linux-4.19.y * git commit: 85bc28045cdbb9576907965c761445aaece4f5ad * git describe: v4.19.186-67-g85bc28045cdb * test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-4.19.y/build/v4.19.186-67-g85bc28045cdb ## No regressions (compared to v4.19.185-19-g6aba908ea95f) ## No fixes (compared to v4.19.185-19-g6aba908ea95f) ## Test result summary total: 65010, pass: 52744, fail: 1575, skip: 10433, xfail: 258, ## Build Summary * arm: 97 total, 96 passed, 1 failed * arm64: 25 total, 24 passed, 1 failed * dragonboard-410c: 1 total, 1 passed, 0 failed * hi6220-hikey: 1 total, 1 passed, 0 failed * i386: 15 total, 13 passed, 2 failed * juno-r2: 1 total, 1 passed, 0 failed * mips: 39 total, 39 passed, 0 failed * s390: 9 total, 9 passed, 0 failed * sparc: 9 total, 9 passed, 0 failed * x15: 2 total, 1 passed, 1 failed * x86: 1 total, 1 passed, 0 failed * x86_64: 15 total, 14 passed, 1 failed ## Test suites summary * fwts * igt-gpu-tools * install-android-platform-tools-r2600 * kselftest- * kselftest-android * kselftest-bpf * kselftest-capabilities * kselftest-cgroup * kselftest-clone3 * kselftest-core * kselftest-cpu-hotplug * kselftest-cpufreq * kselftest-efivarfs * kselftest-filesystems * kselftest-firmware * kselftest-fpu * kselftest-futex * kselftest-gpio * kselftest-intel_pstate * kselftest-ipc * kselftest-ir * kselftest-kcmp * kselftest-kexec * kselftest-kvm * kselftest-lib * kselftest-livepatch * kselftest-lkdtm * kselftest-membarrier * kselftest-memfd * kselftest-memory-hotplug * kselftest-mincore * kselftest-mount * kselftest-mqueue * kselftest-net * kselftest-netfilter * kselftest-nsfs * kselftest-openat2 * kselftest-pid_namespace * kselftest-pidfd * kselftest-proc * kselftest-pstore * kselftest-ptrace * kselftest-rseq * kselftest-rtc * kselftest-seccomp * kselftest-sigaltstack * kselftest-size * kselftest-splice * kselftest-static_keys * kselftest-sync * kselftest-sysctl * kselftest-tc-testing * kselftest-timens * kselftest-timers * kselftest-tmpfs * kselftest-tpm2 * kselftest-user * kselftest-vm * kselftest-vsyscall-mode-native- * kselftest-vsyscall-mode-none- * kselftest-x86 * kselftest-zram * kvm-unit-tests * libhugetlbfs * linux-log-parser * ltp-cap_bounds-tests * ltp-commands-tests * ltp-containers-tests * ltp-controllers-tests * ltp-cpuhotplug-tests * ltp-crypto-tests * ltp-cve-tests * ltp-dio-tests * ltp-fcntl-locktests-tests * ltp-filecaps-tests * ltp-fs-tests * ltp-fs_bind-tests * ltp-fs_perms_simple-tests * ltp-fsx-tests * ltp-hugetlb-tests * ltp-io-tests * ltp-ipc-tests * ltp-math-tests * ltp-mm-tests * ltp-nptl-tests * ltp-open-posix-tests * ltp-pty-tests * ltp-sched-tests * ltp-securebits-tests * ltp-syscalls-tests * ltp-tracing-tests * network-basic-tests * perf * rcutorture * ssuite * v4l2-compliance -- Linaro LKFT https://lkft.linaro.org
Re: [PATCH][next] KEYS: trusted: Fix missing null return from kzalloc call
On Mon, 12 Apr 2021 at 22:34, Colin Ian King wrote: > > On 12/04/2021 17:48, James Bottomley wrote: > > On Mon, 2021-04-12 at 17:01 +0100, Colin King wrote: > >> From: Colin Ian King > >> > >> The kzalloc call can return null with the GFP_KERNEL flag so > >> add a null check and exit via a new error exit label. Use the > >> same exit error label for another error path too. > >> > >> Addresses-Coverity: ("Dereference null return value") > >> Fixes: 830027e2cb55 ("KEYS: trusted: Add generic trusted keys > >> framework") > >> Signed-off-by: Colin Ian King > >> --- > >> security/keys/trusted-keys/trusted_core.c | 6 -- > >> 1 file changed, 4 insertions(+), 2 deletions(-) > >> > >> diff --git a/security/keys/trusted-keys/trusted_core.c > >> b/security/keys/trusted-keys/trusted_core.c > >> index ec3a066a4b42..90774793f0b1 100644 > >> --- a/security/keys/trusted-keys/trusted_core.c > >> +++ b/security/keys/trusted-keys/trusted_core.c > >> @@ -116,11 +116,13 @@ static struct trusted_key_payload > >> *trusted_payload_alloc(struct key *key) > >> > >> ret = key_payload_reserve(key, sizeof(*p)); > >> if (ret < 0) > >> -return p; > >> +goto err; > >> p = kzalloc(sizeof(*p), GFP_KERNEL); > >> +if (!p) > >> +goto err; > >> > >> p->migratable = migratable; > >> - > >> +err: > >> return p; > > > > This is clearly a code migration bug in > > > > commit 251c85bd106099e6f388a89e88e12d14de2c9cda > > Author: Sumit Garg > > Date: Mon Mar 1 18:41:24 2021 +0530 > > > > KEYS: trusted: Add generic trusted keys framework > > > > Which has for addition to trusted_core.c: > > > > +static struct trusted_key_payload *trusted_payload_alloc(struct key > > *key) > > +{ > > + struct trusted_key_payload *p = NULL; > > + int ret; > > + > > + ret = key_payload_reserve(key, sizeof(*p)); > > + if (ret < 0) > > + return p; > > + p = kzalloc(sizeof(*p), GFP_KERNEL); > > + > > + p->migratable = migratable; > > + > > + return p; > > +} > > > > And for trusted_tpm1.c: > > > > -static struct trusted_key_payload *trusted_payload_alloc(struct key > > *key) > > -{ > > - struct trusted_key_payload *p = NULL; > > - int ret; > > - > > - ret = key_payload_reserve(key, sizeof *p); > > - if (ret < 0) > > - return p; > > - p = kzalloc(sizeof *p, GFP_KERNEL); > > - if (p) > > - p->migratable = 1; /* migratable by default */ > > - return p; > > -} > > > > The trusted_tpm1.c code was correct and we got this bug introduced by > > what should have been a simple cut and paste ... how did that happen? It was a little more than just cut and paste where I did generalized "migratable" flag to be provided by the corresponding trust source's ops struct. > > And therefore, how safe is the rest of the extraction into > > trusted_core.c? > > > > fortunately it gets caught by static analysis, but it does make me also > concerned about what else has changed and how this gets through review. > I agree that extraction into trusted_core.c was a complex change but this patch has been up for review for almost 2 years [1]. And extensive testing can't catch this sort of bug as allocation wouldn't normally fail. [1] https://lwn.net/Articles/795416/ -Sumit > > James > > > > >
Re: [PATCH 0/1] Use of /sys/bus/pci/devices/…/index for non-SMBIOS platforms
On Mon, Apr 12, 2021 at 03:59:04PM +0200, Niklas Schnelle wrote: > Hi Narendra, Hi All, > > According to Documentation/ABI/testing/sysfs-bus-pci you are responsible > for the index device attribute that is used by systemd to create network > interface names. > > Now we would like to reuse this attribute for firmware provided PCI > device index numbers on the s390 architecture which doesn't have > SMBIOS/DMI nor ACPI. All code changes are within our architecture > specific code but I'd like to get some Acks for this reuse. I've sent an > RFC version of this patch on 15th of March with the subject: > >s390/pci: expose a PCI device's UID as its index > > but got no response. Would it be okay to re-use this attribute for > essentially the same purpose but with index numbers provided by > a different platform mechanism? I think this would be cleaner than > further proliferation of /sys/bus/pci/devices//xyz_index > attributes and allows re-use of the existing userspace infrastructure. I'm missing an explanation that this change is safe for systemd and they don't have some hard-coded assumption about the meaning of existing index on s390. Thanks
[PATCH v2][next] scsi: aacraid: Replace one-element array with flexible-array member
There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Refactor the code according to the use of a flexible-array member in struct aac_raw_io2 instead of one-element array, and use the struct_size() and flex_array_size() helpers. Also, this helps with the ongoing efforts to enable -Warray-bounds by fixing the following warnings: drivers/scsi/aacraid/aachba.c: In function ‘aac_build_sgraw2’: drivers/scsi/aacraid/aachba.c:3970:18: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 3970 | if (rio2->sge[j].length % (i*PAGE_SIZE)) { | ~^~~ drivers/scsi/aacraid/aachba.c:3974:27: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 3974 | nseg_new += (rio2->sge[j].length / (i*PAGE_SIZE)); | ~^~~ drivers/scsi/aacraid/aachba.c:4011:28: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 4011 | for (j = 0; j < rio2->sge[i].length / (pages * PAGE_SIZE); ++j) { | ~^~~ drivers/scsi/aacraid/aachba.c:4012:24: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 4012 |addr_low = rio2->sge[i].addrLow + j * pages * PAGE_SIZE; | ~^~~ drivers/scsi/aacraid/aachba.c:4014:33: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 4014 |sge[pos].addrHigh = rio2->sge[i].addrHigh; |~^~~ drivers/scsi/aacraid/aachba.c:4015:28: warning: array subscript 1 is above array bounds of ‘struct sge_ieee1212[1]’ [-Warray-bounds] 4015 |if (addr_low < rio2->sge[i].addrLow) | ~^~~ [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/79 Link: https://github.com/KSPP/linux/issues/109 Build-tested-by: kernel test robot Link: https://lore.kernel.org/lkml/60414244.ur4%2fki+fbf1ohkzs%25...@intel.com/ Signed-off-by: Gustavo A. R. Silva --- Changes in v2: - Add code comment for clarification. drivers/scsi/aacraid/aachba.c | 17 +++-- drivers/scsi/aacraid/aacraid.h | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 8e06604370c4..2816a15d5633 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1235,8 +1235,8 @@ static int aac_read_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u3 if (ret < 0) return ret; command = ContainerRawIo2; - fibsize = sizeof(struct aac_raw_io2) + - ((le32_to_cpu(readcmd2->sgeCnt)-1) * sizeof(struct sge_ieee1212)); + fibsize = struct_size(readcmd2, sge, +le32_to_cpu(readcmd2->sgeCnt)); } else { struct aac_raw_io *readcmd; readcmd = (struct aac_raw_io *) fib_data(fib); @@ -1366,8 +1366,8 @@ static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u if (ret < 0) return ret; command = ContainerRawIo2; - fibsize = sizeof(struct aac_raw_io2) + - ((le32_to_cpu(writecmd2->sgeCnt)-1) * sizeof(struct sge_ieee1212)); + fibsize = struct_size(writecmd2, sge, + le32_to_cpu(writecmd2->sgeCnt)); } else { struct aac_raw_io *writecmd; writecmd = (struct aac_raw_io *) fib_data(fib); @@ -4003,7 +4003,7 @@ static int aac_convert_sgraw2(struct aac_raw_io2 *rio2, int pages, int nseg, int if (aac_convert_sgl == 0) return 0; - sge = kmalloc_array(nseg_new, sizeof(struct sge_ieee1212), GFP_ATOMIC); + sge = kmalloc_array(nseg_new, sizeof(*sge), GFP_ATOMIC); if (sge == NULL) return -ENOMEM; @@ -4020,7 +4020,12 @@ static int aac_convert_sgraw2(struct aac_raw_io2 *rio2, int pages, int nseg, int } } sge[pos] = rio2->sge[nseg-1]; - memcpy(&rio2->sge[1], &sge[1], (nseg_new-1)*sizeof(struct sge_ieee1212)); + /* +* Notice that, in this case, flex_array_size() evaluates to +* (nseg_new - 1) number of sge objects of type struct sge_ieee1212. +*/ + memcpy(&rio2->sge[1], &sge[1], + flex_array_size(rio2, sge, nseg_new - 1)); kfree(sge); rio2->sgeCnt = cpu_to_le32(nseg_new); dif
Re: [RESEND,v5,1/2] bio: limit bio max size
And more importantly please test with a file system that uses the iomap direct I/O code (btrfs, gfs2, ext4, xfs, zonefs) as we should never just work aroudn a legacy codebase that should go away in the block layer.
[PATCH v2 2/2] x86/tsc: skip tsc watchdog checking for qualified platforms
There are cases that tsc clocksources are wrongly judged as unstable by clocksource watchdogs like hpet, acpi_pm or 'refined-jiffies'. While there is hardly a general reliable way to check the validity of a watchdog, and to protect the innocent tsc, Thomas Gleixner proposed [1]: "I'm inclined to lift that requirement when the CPU has: 1) X86_FEATURE_CONSTANT_TSC 2) X86_FEATURE_NONSTOP_TSC 3) X86_FEATURE_NONSTOP_TSC_S3 4) X86_FEATURE_TSC_ADJUST 5) At max. 4 sockets After two decades of horrors we're finally at a point where TSC seems to be halfway reliable and less abused by BIOS tinkerers. TSC_ADJUST was really key as we can now detect even small modifications reliably and the important point is that we can cure them as well (not pretty but better than all other options)." As feature #3 X86_FEATURE_NONSTOP_TSC_S3 only exists on several generations of Atom processor, and is always coupled with X86_FEATURE_CONSTANT_TSC and X86_FEATURE_NONSTOP_TSC, skip checking it, and also be more defensive to use maxim of 2 sockets. The check is done inside tsc_init() before registering 'tsc-early' and 'tsc' clocksources, as there were cases that both of them had been wrongly judged as unreliable. [1]. https://lore.kernel.org/lkml/87eekfk8bd@nanos.tec.linutronix.de/ Suggested-by: Thomas Gleixner Signed-off-by: Feng Tang --- Change log: v2: * Directly skip watchdog check without messing flag 'tsc_clocksource_reliable' (Thomas) arch/x86/kernel/tsc.c | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index f70dffc..bfd013b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1177,6 +1177,12 @@ void mark_tsc_unstable(char *reason) EXPORT_SYMBOL_GPL(mark_tsc_unstable); +static void __init tsc_skip_watchdog_verify(void) +{ + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} + static void __init check_system_tsc_reliable(void) { #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) @@ -1193,6 +1199,17 @@ static void __init check_system_tsc_reliable(void) #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; + + /* +* Ideally the socket number should be checked, but this is called +* by tsc_init() which is in early boot phase and the socket numbers +* may not be available. Use 'nr_online_nodes' as a fallback solution +*/ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && + nr_online_nodes <= 2) + tsc_skip_watchdog_verify(); } /* @@ -1384,9 +1401,6 @@ static int __init init_tsc_clocksource(void) if (tsc_unstable) goto unreg; - if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1524,7 +1538,7 @@ void __init tsc_init(void) } if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + tsc_skip_watchdog_verify(); clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); -- 2.7.4
[PATCH v2 1/2] x86/tsc: add a timer to make sure tsc_adjust is always checked
Normally the tsc_sync will get checked every time system enters idle state, but Thomas Gleixner mentioned there is still a caveat that a system won't enter idle [1], either because it's too busy or configured purposely to not enter idle. Setup a periodic timer to make sure the check is always on. [1]. https://lore.kernel.org/lkml/875z286xtk@nanos.tec.linutronix.de/ Signed-off-by: Feng Tang --- Change log: v2: * skip timer setup when tsc_clocksource_reliabe==1 (Thomas) * refine comment and code format (Thomas) arch/x86/kernel/tsc_sync.c | 39 +++ 1 file changed, 39 insertions(+) diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 3d3c761..39f18fa 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -30,6 +30,7 @@ struct tsc_adjust { }; static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +static struct timer_list tsc_sync_check_timer; /* * TSC's on different sockets may be reset asynchronously. @@ -77,6 +78,44 @@ void tsc_verify_tsc_adjust(bool resume) } } +/* + * Normally the tsc_sync will be checked every time system enters idle state, + * but there is still caveat that a system won't enter idle, either because + * it's too busy or configured purposely to not enter idle. + * + * So setup a periodic timer to make sure the check is always on. + */ + +#define SYNC_CHECK_INTERVAL(HZ * 600) + +static void tsc_sync_check_timer_fn(struct timer_list *unused) +{ + int next_cpu; + + tsc_verify_tsc_adjust(false); + + /* Run the check for all onlined CPUs in turn */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + + tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL; + add_timer_on(&tsc_sync_check_timer, next_cpu); +} + +static int __init start_sync_check_timer(void) +{ + if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable) + return 0; + + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0); + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL; + add_timer(&tsc_sync_check_timer); + + return 0; +} +late_initcall(start_sync_check_timer); + static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, unsigned int cpu, bool bootcpu) { -- 2.7.4
Re: [PATCH][next] KEYS: trusted: Fix missing null return from kzalloc call
On Mon, 12 Apr 2021 at 21:31, Colin King wrote: > > From: Colin Ian King > > The kzalloc call can return null with the GFP_KERNEL flag so > add a null check and exit via a new error exit label. Use the > same exit error label for another error path too. > > Addresses-Coverity: ("Dereference null return value") > Fixes: 830027e2cb55 ("KEYS: trusted: Add generic trusted keys framework") > Signed-off-by: Colin Ian King > --- > security/keys/trusted-keys/trusted_core.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > Ah, it's my bad. Thanks for fixing this issue. Reviewed-by: Sumit Garg -Sumit > diff --git a/security/keys/trusted-keys/trusted_core.c > b/security/keys/trusted-keys/trusted_core.c > index ec3a066a4b42..90774793f0b1 100644 > --- a/security/keys/trusted-keys/trusted_core.c > +++ b/security/keys/trusted-keys/trusted_core.c > @@ -116,11 +116,13 @@ static struct trusted_key_payload > *trusted_payload_alloc(struct key *key) > > ret = key_payload_reserve(key, sizeof(*p)); > if (ret < 0) > - return p; > + goto err; > p = kzalloc(sizeof(*p), GFP_KERNEL); > + if (!p) > + goto err; > > p->migratable = migratable; > - > +err: > return p; > } > > -- > 2.30.2 >
Re: [PATCH] ibmvfc: Fix invalid state machine BUG_ON
Tyrel, > This fixes an issue hitting the BUG_ON in ibmvfc_do_work. When going > through a host action of IBMVFC_HOST_ACTION_RESET, we change the > action to IBMVFC_HOST_ACTION_TGT_DEL, then drop the host lock, and > reset the CRQ, which changes the host state to IBMVFC_NO_CRQ. [...] Applied to 5.13/scsi-staging, thanks! -- Martin K. Petersen Oracle Linux Engineering
[PATCH v2 6/9] userfaultfd/selftests: create alias mappings in the shmem test
Previously, we just allocated two shm areas: area_src and area_dst. With this commit, change this so we also allocate area_src_alias, and area_dst_alias. area_*_alias and area_* (respectively) point to the same underlying physical pages, but are different VMAs. In a future commit in this series, we'll leverage this setup to exercise minor fault handling support for shmem, just like we do in the hugetlb_shared test. Signed-off-by: Axel Rasmussen --- tools/testing/selftests/vm/userfaultfd.c | 22 +++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index fc40831f818f..1f65c4ab7994 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -278,13 +278,29 @@ static void shmem_release_pages(char *rel_area) static void shmem_allocate_area(void **alloc_area) { - unsigned long offset = - alloc_area == (void **)&area_src ? 0 : nr_pages * page_size; + void *area_alias = NULL; + bool is_src = alloc_area == (void **)&area_src; + unsigned long offset = is_src ? 0 : nr_pages * page_size; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); + + area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, offset); + if (area_alias == MAP_FAILED) + err("mmap of memfd alias failed"); + + if (is_src) + area_src_alias = area_alias; + else + area_dst_alias = area_alias; +} + +static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) +{ + *start = (unsigned long)area_dst_alias + offset; } struct uffd_test_ops { @@ -314,7 +330,7 @@ static struct uffd_test_ops shmem_uffd_test_ops = { .expected_ioctls = SHMEM_EXPECTED_IOCTLS, .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, - .alias_mapping = noop_alias_mapping, + .alias_mapping = shmem_alias_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { -- 2.31.1.295.g9ea45b61b8-goog