[PATCH] regulator: max8907: Add MODULE_ALIAS
This driver can be built as a module, add MODULE_ALIAS for it. Signed-off-by: Axel Lin --- drivers/regulator/max8907-regulator.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/max8907-regulator.c b/drivers/regulator/max8907-regulator.c index 3a5104f..19c765a 100644 --- a/drivers/regulator/max8907-regulator.c +++ b/drivers/regulator/max8907-regulator.c @@ -402,3 +402,4 @@ module_exit(max8907_reg_exit); MODULE_DESCRIPTION("MAX8907 regulator driver"); MODULE_AUTHOR("Gyungoh Yoo "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:max8907-regulator"); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 3/5] virtio_balloon: introduce migration primitives to balloon pages
Memory fragmentation introduced by ballooning might reduce significantly the number of 2MB contiguous memory blocks that can be used within a guest, thus imposing performance penalties associated with the reduced number of transparent huge pages that could be used by the guest workload. Besides making balloon pages movable at allocation time and introducing the necessary primitives to perform balloon page migration/compaction, the patch changes the balloon bookeeping pages counter into an atomic counter, as well as it introduces the following locking scheme, in order to enhance the syncronization methods for accessing elements of struct virtio_balloon, thus providing protection against the concurrent accesses introduced by parallel memory compaction threads. - balloon_lock (mutex) : synchronizes the access demand to elements of struct virtio_balloon and its queue operations; - pages_lock (spinlock): special protection to balloon's pages bookmarking elements (list and atomic counters) against the potential memory compaction concurrency; Signed-off-by: Rafael Aquini --- drivers/virtio/virtio_balloon.c | 286 +--- 1 file changed, 265 insertions(+), 21 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0908e60..9b0bc46 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include /* * Balloon device works in 4K page units. So each page is pointed to by @@ -34,6 +36,7 @@ * page units. */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) +#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 struct virtio_balloon { @@ -46,11 +49,24 @@ struct virtio_balloon /* The thread servicing the balloon. */ struct task_struct *thread; + /* balloon special page->mapping */ + struct address_space *mapping; + + /* Synchronize access/update to this struct virtio_balloon elements */ + struct mutex balloon_lock; + /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; + /* Number of balloon pages isolated from 'pages' list for compaction */ + atomic_t num_isolated_pages; + /* Number of balloon pages we've told the Host we're not using. */ - unsigned int num_pages; + atomic_t num_pages; + + /* Protect pages list, and pages bookeeping counters */ + spinlock_t pages_lock; + /* * The pages we've told the Host we're not using. * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE @@ -60,7 +76,7 @@ struct virtio_balloon /* The array of pfns we tell the Host about. */ unsigned int num_pfns; - u32 pfns[256]; + u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; /* Memory statistics */ int need_stats_update; @@ -122,13 +138,17 @@ static void set_page_pfns(u32 pfns[], struct page *page) static void fill_balloon(struct virtio_balloon *vb, size_t num) { + /* Get the proper GFP alloc mask from vb->mapping flags */ + gfp_t vb_gfp_mask = mapping_gfp_mask(vb->mapping); + /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); + mutex_lock(&vb->balloon_lock); for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { - struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | - __GFP_NOMEMALLOC | __GFP_NOWARN); + struct page *page = alloc_page(vb_gfp_mask | __GFP_NORETRY | + __GFP_NOWARN | __GFP_NOMEMALLOC); if (!page) { if (printk_ratelimit()) dev_printk(KERN_INFO, &vb->vdev->dev, @@ -139,9 +159,15 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) break; } set_page_pfns(vb->pfns + vb->num_pfns, page); - vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; totalram_pages--; + + BUG_ON(!trylock_page(page)); + spin_lock(&vb->pages_lock); list_add(&page->lru, &vb->pages); + assign_balloon_mapping(page, vb->mapping); + atomic_add(VIRTIO_BALLOON_PAGES_PER_PAGE, &vb->num_pages); + spin_unlock(&vb->pages_lock); + unlock_page(page); } /* Didn't get any? Oh well. */ @@ -149,6 +175,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) return; tell_host(vb, vb->inflate_vq); + mutex_unlock(&vb->balloon_lock); } static void release_pages_by_pfn(const u32 pfns[], unsigned int num) @@ -162,19 +189,97 @@ static
[PATCH v9 5/5] mm: add vm event counters for balloon pages compaction
This patch introduces a new set of vm event counters to keep track of ballooned pages compaction activity. Signed-off-by: Rafael Aquini --- drivers/virtio/virtio_balloon.c | 1 + include/linux/vm_event_item.h | 8 +++- mm/balloon_compaction.c | 2 ++ mm/migrate.c| 1 + mm/vmstat.c | 10 +- 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 9b0bc46..e1e8e30 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -528,6 +528,7 @@ int virtballoon_migratepage(struct address_space *mapping, mutex_unlock(&vb->balloon_lock); wake_up(&vb->config_change); + count_balloon_event(COMPACTBALLOONMIGRATED); return BALLOON_MIGRATION_RETURN; } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 57f7b10..13573fe 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,7 +41,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_COMPACTION COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, -#endif +#ifdef CONFIG_BALLOON_COMPACTION + COMPACTBALLOONISOLATED, /* isolated from balloon pagelist */ + COMPACTBALLOONMIGRATED, /* balloon page sucessfully migrated */ + COMPACTBALLOONRELEASED, /* old-page released after migration */ + COMPACTBALLOONRETURNED, /* putback to pagelist, not-migrated */ +#endif /* CONFIG_BALLOON_COMPACTION */ +#endif /* CONFIG_COMPACTION */ #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, #endif diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 86a3692..00e7ea9 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -110,6 +110,7 @@ bool isolate_balloon_page(struct page *page) if (__is_movable_balloon_page(page) && (page_count(page) == 2)) { __isolate_balloon_page(page); + count_balloon_event(COMPACTBALLOONISOLATED); unlock_page(page); return true; } else if (unlikely(!__is_movable_balloon_page(page))) { @@ -139,6 +140,7 @@ void putback_balloon_page(struct page *page) if (__is_movable_balloon_page(page)) { __putback_balloon_page(page); put_page(page); + count_balloon_event(COMPACTBALLOONRETURNED); } else { dump_page(page); __WARN(); diff --git a/mm/migrate.c b/mm/migrate.c index e47daf5..124b16b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -896,6 +896,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, list_del(&page->lru); put_page(page); __free_page(page); + count_balloon_event(COMPACTBALLOONRELEASED); return 0; } out: diff --git a/mm/vmstat.c b/mm/vmstat.c index df7a674..5824ad2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -768,7 +768,15 @@ const char * const vmstat_text[] = { "compact_stall", "compact_fail", "compact_success", -#endif + +#ifdef CONFIG_BALLOON_COMPACTION + "compact_balloon_isolated", + "compact_balloon_migrated", + "compact_balloon_released", + "compact_balloon_returned", +#endif /* CONFIG_BALLOON_COMPACTION */ + +#endif /* CONFIG_COMPACTION */ #ifdef CONFIG_HUGETLB_PAGE "htlb_buddy_alloc_success", -- 1.7.11.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 4/5] mm: introduce putback_movable_pages()
The PATCH "mm: introduce compaction and migration for virtio ballooned pages" hacks around putback_lru_pages() in order to allow ballooned pages to be re-inserted on balloon page list as if a ballooned page was like a LRU page. As ballooned pages are not legitimate LRU pages, this patch introduces putback_movable_pages() to properly cope with cases where the isolated pageset contains ballooned pages and LRU pages, thus fixing the mentioned inelegant hack around putback_lru_pages(). Signed-off-by: Rafael Aquini --- include/linux/migrate.h | 2 ++ mm/compaction.c | 4 ++-- mm/migrate.c| 20 mm/page_alloc.c | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ce7e667..ff103a1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -10,6 +10,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION extern void putback_lru_pages(struct list_head *l); +extern void putback_movable_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, @@ -33,6 +34,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, #else static inline void putback_lru_pages(struct list_head *l) {} +static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, enum migrate_mode mode) { return -ENOSYS; } diff --git a/mm/compaction.c b/mm/compaction.c index e50836b..409b2f5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -817,9 +817,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) trace_mm_compaction_migratepages(nr_migrate - nr_remaining, nr_remaining); - /* Release LRU pages not migrated */ + /* Release isolated pages not migrated */ if (err) { - putback_lru_pages(&cc->migratepages); + putback_movable_pages(&cc->migratepages); cc->nr_migratepages = 0; if (err == -ENOMEM) { ret = COMPACT_PARTIAL; diff --git a/mm/migrate.c b/mm/migrate.c index ec439f8..e47daf5 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -80,6 +80,26 @@ void putback_lru_pages(struct list_head *l) list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); + putback_lru_page(page); + } +} + +/* + * Put previously isolated pages back onto the appropriate lists + * from where they were once taken off for compaction/migration. + * + * This function shall be used instead of putback_lru_pages(), + * whenever the isolated pageset has been built by isolate_migratepages_range() + */ +void putback_movable_pages(struct list_head *l) +{ + struct page *page; + struct page *page2; + + list_for_each_entry_safe(page, page2, l, lru) { + list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); if (unlikely(movable_balloon_page(page))) putback_balloon_page(page); else diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c66fb87..a0c2cc5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5675,7 +5675,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) 0, false, MIGRATE_SYNC); } - putback_lru_pages(&cc.migratepages); + putback_movable_pages(&cc.migratepages); return ret > 0 ? 0 : ret; } -- 1.7.11.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v9 1/5] mm: introduce a common interface for balloon pages mobility
Memory fragmentation introduced by ballooning might reduce significantly the number of 2MB contiguous memory blocks that can be used within a guest, thus imposing performance penalties associated with the reduced number of transparent huge pages that could be used by the guest workload. This patch introduces a common interface to help a balloon driver on making its page set movable to compaction, and thus allowing the system to better leverage the compation efforts on memory defragmentation. Signed-off-by: Rafael Aquini --- include/linux/balloon_compaction.h | 137 + include/linux/pagemap.h| 18 mm/Kconfig | 15 mm/Makefile| 2 +- mm/balloon_compaction.c| 172 + 5 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 include/linux/balloon_compaction.h create mode 100644 mm/balloon_compaction.c diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h new file mode 100644 index 000..7afb0ae --- /dev/null +++ b/include/linux/balloon_compaction.h @@ -0,0 +1,137 @@ +/* + * include/linux/balloon_compaction.h + * + * Common interface definitions for making balloon pages movable to compaction. + * + * Copyright (C) 2012, Red Hat, Inc. Rafael Aquini + */ +#ifndef _LINUX_BALLOON_COMPACTION_H +#define _LINUX_BALLOON_COMPACTION_H +#ifdef __KERNEL__ + +#include +#include +#include + +#ifdef CONFIG_BALLOON_COMPACTION +#define count_balloon_event(e) count_vm_event(e) +extern bool isolate_balloon_page(struct page *); +extern void putback_balloon_page(struct page *); +extern int migrate_balloon_page(struct page *newpage, + struct page *page, enum migrate_mode mode); + +static inline gfp_t balloon_mapping_gfp_mask(void) +{ + return GFP_HIGHUSER_MOVABLE; +} + +/* + * movable_balloon_page - test page->mapping->flags to identify balloon pages + * that can be moved by compaction/migration. + * + * This function is used at core compaction's page isolation scheme and so it's + * exposed to several system pages which may, or may not, be part of a memory + * balloon, and thus we cannot afford to hold a page locked to perform tests. + * + * Therefore, as we might return false positives in the case a balloon page + * is just released under us, the page->mapping->flags need to be retested + * with the proper page lock held, on the functions that will cope with the + * balloon page later. + */ +static inline bool movable_balloon_page(struct page *page) +{ + /* +* Before dereferencing and testing mapping->flags, lets make sure +* this is not a page that uses ->mapping in a different way +*/ + if (!PageSlab(page) && !PageSwapCache(page) && + !PageAnon(page) && !page_mapped(page)) { + /* +* While doing compaction core work, we cannot afford to hold +* page lock as it might cause very undesirable side effects. +*/ + struct address_space *mapping; + mapping = rcu_dereference_raw(page->mapping); + if (mapping) + return mapping_balloon(mapping); + } + return false; +} + +/* + * __page_balloon_device - return the balloon device owing the page. + * + * This shall only be used at driver callbacks under proper page lock, + * to get access to the balloon device structure that owns @page. + */ +static inline void *__page_balloon_device(struct page *page) +{ + struct address_space *mapping; + mapping = rcu_dereference_protected(page->mapping, PageLocked(page)); + if (mapping) + mapping = mapping->assoc_mapping; + return (void *)mapping; +} + +/* + * DEFINE_BALLOON_MAPPING_AOPS - declare and instantiate a callback descriptor + * to be used as balloon page->mapping->a_ops. + * + * @label : declaration identifier (var name) + * @isolatepg : callback symbol name for performing the page isolation step + * @migratepg : callback symbol name for performing the page migration step + * @putbackpg : callback symbol name for performing the page putback step + * + * address_space_operations utilized methods for ballooned pages: + * .migratepage- used to perform balloon's page migration (as is) + * .invalidatepage - used to isolate a page from balloon's page list + * .freepage - used to reinsert an isolated page to balloon's page list + */ +#define DEFINE_BALLOON_MAPPING_AOPS(label, isolatepg, migratepg, putbackpg) \ + const struct address_space_operations (label) = { \ + .migratepage= (migratepg), \ + .invalidatepage = (isolatepg), \ + .freepage = (putbackpg), \ + } + +
[PATCH v9 0/5] make balloon pages movable by compaction
Memory fragmentation introduced by ballooning might reduce significantly the number of 2MB contiguous memory blocks that can be used within a guest, thus imposing performance penalties associated with the reduced number of transparent huge pages that could be used by the guest workload. This patch-set follows the main idea discussed at 2012 LSFMMS session: "Ballooning for transparent huge pages" -- http://lwn.net/Articles/490114/ to introduce the required changes to the virtio_balloon driver, as well as the changes to the core compaction & migration bits, in order to make those subsystems aware of ballooned pages and allow memory balloon pages become movable within a guest, thus avoiding the aforementioned fragmentation issue Rafael Aquini (5): mm: introduce a common interface for balloon pages mobility mm: introduce compaction and migration for ballooned pages virtio_balloon: introduce migration primitives to balloon pages mm: introduce putback_movable_pages() mm: add vm event counters for balloon pages compaction drivers/virtio/virtio_balloon.c| 287 ++--- include/linux/balloon_compaction.h | 137 ++ include/linux/migrate.h| 2 + include/linux/pagemap.h| 18 +++ include/linux/vm_event_item.h | 8 +- mm/Kconfig | 15 ++ mm/Makefile| 2 +- mm/balloon_compaction.c| 174 ++ mm/compaction.c| 51 --- mm/migrate.c | 57 +++- mm/page_alloc.c| 2 +- mm/vmstat.c| 10 +- 12 files changed, 715 insertions(+), 48 deletions(-) create mode 100644 include/linux/balloon_compaction.h create mode 100644 mm/balloon_compaction.c Change log: v9: * Adjust rcu_dereference usage to leverage page lock protection (Paul, Peter); * Enhance doc on compaction interface introduced to balloon driver (Michael); * Fix issue with isolated pages breaking leak_balloon() logics (Michael); v8: * introduce a common MM interface for balloon driver page compaction (Michael); * remove the global state preventing multiple balloon device support (Michael); * introduce RCU protection/syncrhonization to balloon page->mapping (Michael); v7: * fix a potential page leak case at 'putback_balloon_page' (Mel); * adjust vm-events-counter patch and remove its drop-on-merge message(Rik); * add 'putback_movable_pages' to avoid hacks on 'putback_lru_pages' (Minchan); v6: * rename 'is_balloon_page()' to 'movable_balloon_page()' (Rik); v5: * address Andrew Morton's review comments on the patch series; * address a couple extra nitpick suggestions on PATCH 01 (Minchan); v4: * address Rusty Russel's review comments on PATCH 02; * re-base virtio_balloon patch on 9c378abc5c0c6fc8e3acf5968924d274503819b3; V3: * address reviewers nitpick suggestions on PATCH 01 (Mel, Minchan); V2: * address Mel Gorman's review comments on PATCH 01; Preliminary test results: (2 VCPU 2048mB RAM KVM guest running 3.6.0_rc3+ -- after a reboot) * 64mB balloon: [root@localhost ~]# awk '/compact/ {print}' /proc/vmstat compact_blocks_moved 0 compact_pages_moved 0 compact_pagemigrate_failed 0 compact_stall 0 compact_fail 0 compact_success 0 compact_balloon_isolated 0 compact_balloon_migrated 0 compact_balloon_released 0 compact_balloon_returned 0 [root@localhost ~]# [root@localhost ~]# for i in $(seq 1 6); do echo 1 > /proc/sys/vm/compact_memory & done &>/dev/null [1] Doneecho 1 > /proc/sys/vm/compact_memory [2] Doneecho 1 > /proc/sys/vm/compact_memory [3] Doneecho 1 > /proc/sys/vm/compact_memory [4] Doneecho 1 > /proc/sys/vm/compact_memory [5]- Doneecho 1 > /proc/sys/vm/compact_memory [6]+ Doneecho 1 > /proc/sys/vm/compact_memory [root@localhost ~]# [root@localhost ~]# awk '/compact/ {print}' /proc/vmstat compact_blocks_moved 3108 compact_pages_moved 43169 compact_pagemigrate_failed 95 compact_stall 0 compact_fail 0 compact_success 0 compact_balloon_isolated 16384 compact_balloon_migrated 16384 compact_balloon_released 16384 compact_balloon_returned 0 * 128 mB balloon: [root@localhost ~]# awk '/compact/ {print}' /proc/vmstat compact_blocks_moved 0 compact_pages_moved 0 compact_pagemigrate_failed 0 compact_stall 0 compact_fail 0 compact_success 0 compact_balloon_isolated 0 compact_balloon_migrated 0 compact_balloon_released 0 compact_balloon_returned 0 [root@localhost ~]# [root@localhost ~]# for i in $(seq 1 6); do echo 1 > /proc/sys/vm/compact_memory & done &>/dev/null [1] Doneecho 1 > /proc/sys/vm/compact_memory [2] Doneecho 1 > /proc/sys/vm/compact_memory [3] Doneecho 1 > /proc/sys/vm/compact_memory [4] Done
[PATCH v9 2/5] mm: introduce compaction and migration for ballooned pages
Memory fragmentation introduced by ballooning might reduce significantly the number of 2MB contiguous memory blocks that can be used within a guest, thus imposing performance penalties associated with the reduced number of transparent huge pages that could be used by the guest workload. This patch introduces the helper functions as well as the necessary changes to teach compaction and migration bits how to cope with pages which are part of a guest memory balloon, in order to make them movable by memory compaction procedures. Signed-off-by: Rafael Aquini --- mm/compaction.c | 47 --- mm/migrate.c| 36 ++-- 2 files changed, 62 insertions(+), 21 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 7fcd3a5..e50836b 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" #if defined CONFIG_COMPACTION || defined CONFIG_CMA @@ -358,32 +359,40 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!PageLRU(page)) - continue; - /* -* PageLRU is set, and lru_lock excludes isolation, -* splitting and collapsing (collapsing has already -* happened if PageLRU is set). +* It is possible to migrate LRU pages and balloon pages. +* Skip any other type of page. */ - if (PageTransHuge(page)) { - low_pfn += (1 << compound_order(page)) - 1; - continue; - } + if (PageLRU(page)) { + /* +* PageLRU is set, and lru_lock excludes isolation, +* splitting and collapsing (collapsing has already +* happened if PageLRU is set). +*/ + if (PageTransHuge(page)) { + low_pfn += (1 << compound_order(page)) - 1; + continue; + } - if (!cc->sync) - mode |= ISOLATE_ASYNC_MIGRATE; + if (!cc->sync) + mode |= ISOLATE_ASYNC_MIGRATE; - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, zone); - /* Try isolate the page */ - if (__isolate_lru_page(page, mode) != 0) - continue; + /* Try isolate the page */ + if (__isolate_lru_page(page, mode) != 0) + continue; - VM_BUG_ON(PageTransCompound(page)); + VM_BUG_ON(PageTransCompound(page)); + + /* Successfully isolated */ + del_page_from_lru_list(page, lruvec, page_lru(page)); + } else if (unlikely(movable_balloon_page(page))) { + if (!isolate_balloon_page(page)) + continue; + } else + continue; - /* Successfully isolated */ - del_page_from_lru_list(page, lruvec, page_lru(page)); list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; diff --git a/mm/migrate.c b/mm/migrate.c index 77ed2d7..ec439f8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -79,7 +80,10 @@ void putback_lru_pages(struct list_head *l) list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - putback_lru_page(page); + if (unlikely(movable_balloon_page(page))) + putback_balloon_page(page); + else + putback_lru_page(page); } } @@ -799,6 +803,18 @@ static int __unmap_and_move(struct page *page, struct page *newpage, goto skip_unmap; } + if (unlikely(movable_balloon_page(page))) { + /* +* A ballooned page does not need any special attention from +* physical to virtual reverse mapping procedures. +* Skip any attempt to unmap PTEs or to remap swap cache, +* in order to avoid burning cycles at rmap level, and perform +* the page migration right away (proteced by page lock). +*/ + rc = migrate_balloon_page(newpage, page, mode); + goto uncharge; + } + /* Establish migration ptes or remove ptes */ try_to_unmap(page, TTU_MIGRATION|
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 9:24 PM, Jacob Shin wrote: > On Fri, Aug 24, 2012 at 06:07:01PM -0700, Yinghai Lu wrote: >> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: >> > Currently direct mappings are created for [ 0 to max_low_pfn<> > and [ 4GB to max_pfn<> > backed by actual DRAM. This is fine for holes under 4GB which are covered >> > by fixed and variable range MTRRs to be UC. However, we run into trouble >> > on higher memory addresses which cannot be covered by MTRRs. >> > >> > Our system with 1TB of RAM has an e820 that looks like this: >> > >> > BIOS-e820: [mem 0x-0x000983ff] usable >> > BIOS-e820: [mem 0x00098400-0x0009] reserved >> > BIOS-e820: [mem 0x000d-0x000f] reserved >> > BIOS-e820: [mem 0x0010-0xc7eb] usable >> > BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data >> > BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS >> > BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved >> > BIOS-e820: [mem 0xfec0-0xfec0] reserved >> > BIOS-e820: [mem 0xfee0-0xfee00fff] reserved >> > BIOS-e820: [mem 0xfff0-0x] reserved >> > BIOS-e820: [mem 0x0001-0x00e037ff] usable >> > BIOS-e820: [mem 0x00e03800-0x00fc] reserved >> > BIOS-e820: [mem 0x0100-0x011ffeff] usable >> > >> > and so direct mappings are created for huge memory hole between >> > 0x00e03800 to 0x0100. Even though the kernel never >> > generates memory accesses in that region, since the page tables mark >> > them incorrectly as being WB, our (AMD) processor ends up causing a MCE >> > while doing some memory bookkeeping/optimizations around that area. >> > >> > This patch iterates through e820 and only direct maps ranges that are >> > marked as E820_RAM, and keeps track of those pfn ranges. Depending on >> > the alignment of E820 ranges, this may possibly result in using smaller >> > size (i.e. 4K instead of 2M or 1G) page tables. >> > >> > Signed-off-by: Jacob Shin >> > --- >> > arch/x86/include/asm/page_types.h |9 +++ >> > arch/x86/kernel/setup.c | 125 >> > + >> > arch/x86/mm/init.c|2 + >> > arch/x86/mm/init_64.c |6 +- >> > 4 files changed, 112 insertions(+), 30 deletions(-) >> > >> > diff --git a/arch/x86/include/asm/page_types.h >> > b/arch/x86/include/asm/page_types.h >> > index e21fdd1..409047a 100644 >> > --- a/arch/x86/include/asm/page_types.h >> > +++ b/arch/x86/include/asm/page_types.h >> > @@ -3,6 +3,7 @@ >> > >> > #include >> > #include >> > +#include >> > >> > /* PAGE_SHIFT determines the page size */ >> > #define PAGE_SHIFT 12 >> > @@ -40,12 +41,20 @@ >> > #endif /* CONFIG_X86_64 */ >> > >> > #ifndef __ASSEMBLY__ >> > +#include >> > >> > extern int devmem_is_allowed(unsigned long pagenr); >> > >> > extern unsigned long max_low_pfn_mapped; >> > extern unsigned long max_pfn_mapped; >> > >> > +extern struct range pfn_mapped[E820_X_MAX]; >> > +extern int nr_pfn_mapped; >> > + >> > +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long >> > end_pfn); >> > +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long >> > end_pfn); >> > +extern bool pfn_is_mapped(unsigned long pfn); >> > + >> > static inline phys_addr_t get_max_mapped(void) >> > { >> > return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; >> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >> > index 751e020..4217fb4 100644 >> > --- a/arch/x86/kernel/setup.c >> > +++ b/arch/x86/kernel/setup.c >> > @@ -115,13 +115,46 @@ >> > #include >> > >> > /* >> > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 >> > entries. >> > - * The direct mapping extends to max_pfn_mapped, so that we can directly >> > access >> > - * apertures, ACPI and other tables without having to play with fixmaps. >> > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB >> > + * max_pfn_mapped: highest direct mapped pfn over 4GB >> > + * >> > + * The direct mapping only covers E820_RAM regions, so the ranges and >> > gaps are >> > + * represented by pfn_mapped >> > */ >> > unsigned long max_low_pfn_mapped; >> > unsigned long max_pfn_mapped; >> > >> > +struct range pfn_mapped[E820_X_MAX]; >> > +int nr_pfn_mapped; >> > + >> > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) >> > +{ >> > + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, >> > +nr_pfn_mapped, start_pfn, >> > end_pfn); >> > + >> > + max_pfn_mapped = max(max_pfn_mapped, end_pfn); >> > + >> > + if (end_pfn <= (1UL << (32 - PAGE_SHIFT))) >> > + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn); >> > +} >> > + >> > +bool pfn_range_is_map
Re: [PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix
On Fri, Aug 24, 2012 at 06:23:48PM -0700, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > > There could be cases where user supplied memmap=exactmap memory > > mappings do not mark the region where the kernel .text .data and > > .bss reside as E820_RAM as reported here: > > > > https://lkml.org/lkml/2012/8/14/86 > > > > Handle it by complaining, and adding the range back into the e820. > > > > Signed-off-by: Jacob Shin > > --- > > arch/x86/kernel/setup.c | 15 +++ > > 1 file changed, 15 insertions(+) > > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > > index 4217fb4..b84aceb5 100644 > > --- a/arch/x86/kernel/setup.c > > +++ b/arch/x86/kernel/setup.c > > @@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p) > > insert_resource(&iomem_resource, &data_resource); > > insert_resource(&iomem_resource, &bss_resource); > > > > + /* > > +* Complain if .text .data and .bss are not marked as E820_RAM and > > +* attempt to fix it by adding the range. We may have a confused > > BIOS, > > +* or the user may have incorrectly supplied it via > > memmap=exactmap. If > > +* we really are running on top non-RAM, we will crash later > > anyways. > > +*/ > > + if (!e820_all_mapped(code_resource.start, bss_resource.end, > > E820_RAM)) { > > + pr_warn(".text .data .bss are not marked as E820_RAM!\n"); > > + > > + e820_add_region(code_resource.start, > > + bss_resource.end - code_resource.start + 1, > > + E820_RAM); > > + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), > > &e820.nr_map); > >this sanitze_e820_map could be spared. trim_bios_range will > that always. Ah. okay > > > + } > > + > > trim_bios_range(); > > #ifdef CONFIG_X86_32 > > if (ppro_with_ram_bug()) { > > also should use brk_limit instead of bss_resource.end. aka need to > keep the map for brk area. Okay.. will fix on Monday > > Thanks > > Yinghai > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable
* Tejun Heo (t...@kernel.org) wrote: > Hello, > > On Sat, Aug 25, 2012 at 12:59:25AM +0200, Sasha Levin wrote: > > Thats the thing, the amount of things of things you can do with a given > > bucket > > is very limited. You can't add entries to any point besides the head > > (without > > walking the entire list). > > Kinda my point. We already have all the hlist*() interface to deal > with such cases. Having something which is evidently the trivial > hlist hashtable and advertises as such in the interface can be > helpful. I think we need that more than we need anything fancy. > > Heh, this is a debate about which one is less insignificant. I can > see your point. I'd really like to hear what others think on this. > > Guys, do we want something which is evidently trivial hlist hashtable > which can use hlist_*() API directly or do we want something better > encapsulated? My 2 cents, FWIW: I think this specific effort should target a trivially understandable API and implementation, for use-cases where one would be tempted to reimplement his own trivial hash table anyway. So here exposing hlist internals, with which kernel developers are already familiar, seems like a good approach in my opinion, because hiding stuff behind new abstraction might make the target users go away. Then, as we see the need, we can eventually merge a more elaborate hash table with poneys and whatnot, but I would expect that the trivial hash table implementation would still be useful. There are of course very compelling reasons to use a more featureful hash table: automatic resize, RT-aware updates, scalable updates, etc... but I see a purpose for a trivial implementation. Its primary strong points being: - it's trivially understandable, so anyone how want to be really sure they won't end up debugging the hash table instead of their work-in-progress code can have a full understanding of it, - it has few dependencies, which makes it easier to understand and easier to use in some contexts (e.g. early boot). So I'm in favor of not overdoing the abstraction for this trivial hash table, and honestly I would rather prefer that this trivial hash table stays trivial. A more elaborate hash table should probably come as a separate API. Thanks, Mathieu -- Mathieu Desnoyers Operating System Efficiency R&D Consultant EfficiOS Inc. http://www.efficios.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 06:07:01PM -0700, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > > Currently direct mappings are created for [ 0 to max_low_pfn< > and [ 4GB to max_pfn< > backed by actual DRAM. This is fine for holes under 4GB which are covered > > by fixed and variable range MTRRs to be UC. However, we run into trouble > > on higher memory addresses which cannot be covered by MTRRs. > > > > Our system with 1TB of RAM has an e820 that looks like this: > > > > BIOS-e820: [mem 0x-0x000983ff] usable > > BIOS-e820: [mem 0x00098400-0x0009] reserved > > BIOS-e820: [mem 0x000d-0x000f] reserved > > BIOS-e820: [mem 0x0010-0xc7eb] usable > > BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data > > BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS > > BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved > > BIOS-e820: [mem 0xfec0-0xfec0] reserved > > BIOS-e820: [mem 0xfee0-0xfee00fff] reserved > > BIOS-e820: [mem 0xfff0-0x] reserved > > BIOS-e820: [mem 0x0001-0x00e037ff] usable > > BIOS-e820: [mem 0x00e03800-0x00fc] reserved > > BIOS-e820: [mem 0x0100-0x011ffeff] usable > > > > and so direct mappings are created for huge memory hole between > > 0x00e03800 to 0x0100. Even though the kernel never > > generates memory accesses in that region, since the page tables mark > > them incorrectly as being WB, our (AMD) processor ends up causing a MCE > > while doing some memory bookkeeping/optimizations around that area. > > > > This patch iterates through e820 and only direct maps ranges that are > > marked as E820_RAM, and keeps track of those pfn ranges. Depending on > > the alignment of E820 ranges, this may possibly result in using smaller > > size (i.e. 4K instead of 2M or 1G) page tables. > > > > Signed-off-by: Jacob Shin > > --- > > arch/x86/include/asm/page_types.h |9 +++ > > arch/x86/kernel/setup.c | 125 > > + > > arch/x86/mm/init.c|2 + > > arch/x86/mm/init_64.c |6 +- > > 4 files changed, 112 insertions(+), 30 deletions(-) > > > > diff --git a/arch/x86/include/asm/page_types.h > > b/arch/x86/include/asm/page_types.h > > index e21fdd1..409047a 100644 > > --- a/arch/x86/include/asm/page_types.h > > +++ b/arch/x86/include/asm/page_types.h > > @@ -3,6 +3,7 @@ > > > > #include > > #include > > +#include > > > > /* PAGE_SHIFT determines the page size */ > > #define PAGE_SHIFT 12 > > @@ -40,12 +41,20 @@ > > #endif /* CONFIG_X86_64 */ > > > > #ifndef __ASSEMBLY__ > > +#include > > > > extern int devmem_is_allowed(unsigned long pagenr); > > > > extern unsigned long max_low_pfn_mapped; > > extern unsigned long max_pfn_mapped; > > > > +extern struct range pfn_mapped[E820_X_MAX]; > > +extern int nr_pfn_mapped; > > + > > +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long > > end_pfn); > > +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long > > end_pfn); > > +extern bool pfn_is_mapped(unsigned long pfn); > > + > > static inline phys_addr_t get_max_mapped(void) > > { > > return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > > index 751e020..4217fb4 100644 > > --- a/arch/x86/kernel/setup.c > > +++ b/arch/x86/kernel/setup.c > > @@ -115,13 +115,46 @@ > > #include > > > > /* > > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 > > entries. > > - * The direct mapping extends to max_pfn_mapped, so that we can directly > > access > > - * apertures, ACPI and other tables without having to play with fixmaps. > > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB > > + * max_pfn_mapped: highest direct mapped pfn over 4GB > > + * > > + * The direct mapping only covers E820_RAM regions, so the ranges and gaps > > are > > + * represented by pfn_mapped > > */ > > unsigned long max_low_pfn_mapped; > > unsigned long max_pfn_mapped; > > > > +struct range pfn_mapped[E820_X_MAX]; > > +int nr_pfn_mapped; > > + > > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) > > +{ > > + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, > > +nr_pfn_mapped, start_pfn, > > end_pfn); > > + > > + max_pfn_mapped = max(max_pfn_mapped, end_pfn); > > + > > + if (end_pfn <= (1UL << (32 - PAGE_SHIFT))) > > + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn); > > +} > > + > > +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) > > +{ > > + int i; > > + > > + for (i = 0; i < nr_pfn_mapped; i++) > > + if ((start_pfn >= pf
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On 08/24/2012 09:20 PM, Jacob Shin wrote: What is the benefit? So that in the case where we have E820_RAM right above 1MB, we don't call init_memory_mapping twice, first on 0 ~ 1MB and then 1MB ~ something we only call it once. 0 ~ something. So what is the benefit? -hpa -- H. Peter Anvin, Intel Open Source Technology Center I work for Intel. I don't speak on their behalf. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 06:13:02PM -0700, H. Peter Anvin wrote: > On 08/24/2012 05:49 PM, Jacob Shin wrote: > > > > Right, I think what I was attempting to do was to merge the 1MB > > with E820_RAM right above 1MB: > > > > So instead of: > > > > init_memory_mapping(0, 1MB) > > init_memory_mapping(1MB, 2GB) > > > > It would be: > > > > init_memory_mapping(0, 2GB) > > > > While taking care of the odd case where there is a gap right after > > 1MB. > > > > But if its not worth it, I can move it out of the loop. > > > > What is the benefit? So that in the case where we have E820_RAM right above 1MB, we don't call init_memory_mapping twice, first on 0 ~ 1MB and then 1MB ~ something we only call it once. 0 ~ something. I'll get it out of the loop if you don't think its a good idea. > > -hpa > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
On Fri, Aug 24, 2012 at 07:06:42PM -0700, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 6:49 PM, Yinghai Lu wrote: > > On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu wrote: > >> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > >>> Depending on the platform, init_memory_mapping() may be called multiple > >>> times. Move it out to setup_arch() to avoid writing to cr4 on every call. > >>> > >>> Signed-off-by: Jacob Shin > >>> --- > >>> arch/x86/kernel/setup.c | 10 ++ > >>> arch/x86/mm/init.c | 10 -- > >>> 2 files changed, 10 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > >>> index f4b9b80..751e020 100644 > >>> --- a/arch/x86/kernel/setup.c > >>> +++ b/arch/x86/kernel/setup.c > >>> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) > >>> > >>> init_gbpages(); > >>> > >>> + /* Enable PSE if available */ > >>> + if (cpu_has_pse) > >>> + set_in_cr4(X86_CR4_PSE); > >>> + > >>> + /* Enable PGE if available */ > >>> + if (cpu_has_pge) { > >>> + set_in_cr4(X86_CR4_PGE); > >>> + __supported_pte_mask |= _PAGE_GLOBAL; > >>> + } > >>> + > >> > >> please don't put it directly in setup_arch(). > >> > >> and another function. > >> > > > > Jacob, hpa > > > > can you use attached one to replace the first patch? > > Please use attached two instead. Hmm .. okay I'll test with these two patches applied on Monday .. > > Thanks > > Yinghai -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
On Fri, Aug 24, 2012 at 06:25:38PM -0700, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > > Depending on the platform, init_memory_mapping() may be called multiple > > times. Move it out to setup_arch() to avoid writing to cr4 on every call. > > > > Signed-off-by: Jacob Shin > > --- > > arch/x86/kernel/setup.c | 10 ++ > > arch/x86/mm/init.c | 10 -- > > 2 files changed, 10 insertions(+), 10 deletions(-) > > > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > > index f4b9b80..751e020 100644 > > --- a/arch/x86/kernel/setup.c > > +++ b/arch/x86/kernel/setup.c > > @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) > > > > init_gbpages(); > > > > + /* Enable PSE if available */ > > + if (cpu_has_pse) > > + set_in_cr4(X86_CR4_PSE); > > + > > + /* Enable PGE if available */ > > + if (cpu_has_pge) { > > + set_in_cr4(X86_CR4_PGE); > > + __supported_pte_mask |= _PAGE_GLOBAL; > > + } > > + > > please don't put it directly in setup_arch(). > > and another function. It actually gets moved out to another function in patch 3/5 > > Thanks > > Yinghai > > > /* max_pfn_mapped is updated here */ > > max_low_pfn_mapped = init_memory_mapping(0, > > max_low_pfn< > max_pfn_mapped = max_low_pfn_mapped; > > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > > index e0e6990..2f07e09 100644 > > --- a/arch/x86/mm/init.c > > +++ b/arch/x86/mm/init.c > > @@ -149,16 +149,6 @@ unsigned long __init_refok > > init_memory_mapping(unsigned long start, > > use_gbpages = direct_gbpages; > > #endif > > > > - /* Enable PSE if available */ > > - if (cpu_has_pse) > > - set_in_cr4(X86_CR4_PSE); > > - > > - /* Enable PGE if available */ > > - if (cpu_has_pge) { > > - set_in_cr4(X86_CR4_PGE); > > - __supported_pte_mask |= _PAGE_GLOBAL; > > - } > > - > > if (use_gbpages) > > page_size_mask |= 1 << PG_LEVEL_1G; > > if (use_pse) > > -- > > 1.7.9.5 > > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 2/2] Linux 3.0.41-rt62-rc1
--- localversion-rt |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localversion-rt b/localversion-rt index 9b7de93..fef6b3c 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt61 +-rt62-rc1 -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2
On Sat, Aug 25, 2012 at 02:19:14AM +0100, Ben Hutchings wrote: > On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote: > > On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote: > > > Hi, > > > > > > Changes since v1: > > > > > > - Fixed preempt handling in alpha idle loop > > > - added ack from Geert > > > - fixed stable email address, sorry :-/ > > > > > > This time I built tested everywhere but: h8300 (compiler internal error), > > > and mn10300, parisc, score (cross compilers not available in > > > ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/) > > > > > > For testing, you can pull from: > > > > > > git://github.com/fweisbec/linux-dynticks.git > > > rcu/idle-fix-v2 > > > > > > Thanks. > > > > I have queued these on -rcu branch rcu/idle: > > > > git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git > > > > This problem has been in place since 3.3, so it is hard to argue that > > it is a regression for this merge window. I have therefore queued it > > for 3.7. > > I don't follow that; I would expect any serious bug fix (serious enough > for a stable update) to be acceptable for 3.6 at this point. OK, if any of the arch maintainers wishes to submit the patch to 3.6, they are free to do so -- just let me know and I will drop the patch from my tree. That said, all this does is cause spurious warnings to be printed, so not sure it really qualifies as serious. But I am happy to leave that decision with the individual arch maintainers -- it is their arch, after all, so their decision. Thanx, Paul > If the regression occurred in 3.3, then the cc lines should be something > like: > > Cc: # 3.3+ > > and not the current: > > Cc: 3.2.x.. > > (Note, version annotations should be on the right of the address, not in > the 'real name' position on the left.) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 1/2] fix printk flush of messages
Updates console-make-rt-friendly.patch #ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by printk() because: # some liberties taken in this pseudo-code to make it easier to follow printk() vprintk() raw_spin_lock(&logbuf_lock) # increment preempt_count(): preempt_disable() result = console_trylock_for_printk() retval = 0 # lock will always be false, because preempt_count() will be >= 1 lock = ... && !preempt_count() if (lock) retval = 1 return retval # result will always be false since lock will always be false if (result) console_unlock() # this is where the printk() output would be flushed On system boot some printk() output is flushed because register_console() and tty_open() call console_unlock(). This change also fixes the problem that was previously fixed by preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch Signed-off-by: Frank Rowand Cc: Frank Link: http://lkml.kernel.org/r/4fb44fd0.4090...@am.sony.com Signed-off-by: Thomas Gleixner --- kernel/printk.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index 60f4290..f2c459f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -835,7 +835,7 @@ static int console_trylock_for_printk(unsigned int cpu, unsigned long flags) { #ifdef CONFIG_PREEMPT_RT_FULL int lock = (!early_boot_irqs_disabled && !irqs_disabled_flags(flags) && - !preempt_count()) || sysrq_in_progress; + (preempt_count() <= 1)) || sysrq_in_progress; #else int lock = 1; #endif -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 0/2] [ANNOUNCE] 3.0.41-rt62-rc1 stable review
Dear RT Folks, This is the RT stable review cycle of patch 3.0.41-rt62-rc1. Please scream at me if I messed something up. Please test the patches too. The -rc release will be uploaded to kernel.org and will be deleted when the final release is out. This is just a review release (or release candidate). The pre-releases will not be pushed to the git repository, only the final release is. If all goes well, this patch will be converted to the next main release on 8/29/2012 (again on the late Michael Jackson's B-day, and my buddy Derek's) Enjoy, -- Steve To build 3.0.41-rt62-rc1 directly, the following patches should be applied: http://www.kernel.org/pub/linux/kernel/v3.0/linux-3.0.tar.xz http://www.kernel.org/pub/linux/kernel/v3.0/patch-3.0.41.xz http://www.kernel.org/pub/linux/kernel/projects/rt/3.0/patch-3.0.41-rt62-rc1.patch.xz You can also build from 3.0.41-rt61 by applying the incremental patch: http://www.kernel.org/pub/linux/kernel/projects/rt/3.0/incr/patch-3.0.41-rt61-rt62-rc1.patch.xz Changes from 3.0.41-rt61: --- Frank Rowand (1): fix printk flush of messages Steven Rostedt (1): Linux 3.0.41-rt62-rc1 kernel/printk.c |2 +- localversion-rt |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/1 v1] leds: Add new LED driver for lm355x chips
On Fri, Aug 24, 2012 at 12:06 PM, G.Shark Jeong wrote: > From: "G.Shark Jeong" > > LM3554 and LM3556 have similar functions but very different register map. > This driver is a general version for LM355x,lm3554 and lm3556,led chips of TI. > lm3556 driver can be replaced by this driver. > > LM3554 : > The LM3554 is a 2 MHz fixed-frequency synchronous boost > converter with 1.2A dual high side led drivers. > Datasheet: www.ti.com/lit/ds/symlink/lm3554.pdf > > LM3556 : > The LM3556 is a 4 MHz fixed-frequency synchronous boost > converter plus 1.5A constant current driver for a high-current white LED. > Datasheet: www.national.com/ds/LM/LM3556.pdf > > Signed-off-by: G.Shark Jeong > --- > drivers/leds/Kconfig |8 +- > drivers/leds/Makefile |2 +- > drivers/leds/leds-lm3556.c| 512 > drivers/leds/leds-lm355x.c| 529 > + > include/linux/platform_data/leds-lm3556.h | 50 --- > include/linux/platform_data/leds-lm355x.h | 66 > 6 files changed, 600 insertions(+), 567 deletions(-) > delete mode 100644 drivers/leds/leds-lm3556.c > create mode 100644 drivers/leds/leds-lm355x.c > delete mode 100644 include/linux/platform_data/leds-lm3556.h > create mode 100644 include/linux/platform_data/leds-lm355x.h > > diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig > index c96bbaa..4f6ced2 100644 > --- a/drivers/leds/Kconfig > +++ b/drivers/leds/Kconfig > @@ -422,13 +422,13 @@ config LEDS_MAX8997 > This option enables support for on-chip LED drivers on > MAXIM MAX8997 PMIC. > > -config LEDS_LM3556 > - tristate "LED support for LM3556 Chip" > +config LEDS_LM355x > + tristate "LED support for LM355x Chips, LM3554 and LM3556" > depends on LEDS_CLASS && I2C > select REGMAP_I2C > help > - This option enables support for LEDs connected to LM3556. > - LM3556 includes Torch, Flash and Indicator functions. > + This option enables support for LEDs connected to LM355x. > + LM355x includes Torch, Flash and Indicator functions. > > config LEDS_OT200 > tristate "LED support for the Bachmann OT200" > diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile > index a4429a9..b57a021 100644 > --- a/drivers/leds/Makefile > +++ b/drivers/leds/Makefile > @@ -48,7 +48,7 @@ obj-$(CONFIG_LEDS_NETXBIG)+= leds-netxbig.o > obj-$(CONFIG_LEDS_ASIC3) += leds-asic3.o > obj-$(CONFIG_LEDS_RENESAS_TPU) += leds-renesas-tpu.o > obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o > -obj-$(CONFIG_LEDS_LM3556) += leds-lm3556.o > +obj-$(CONFIG_LEDS_LM355x) += leds-lm355x.o > obj-$(CONFIG_LEDS_BLINKM) += leds-blinkm.o > > # LED SPI Drivers > diff --git a/drivers/leds/leds-lm3556.c b/drivers/leds/leds-lm3556.c > deleted file mode 100644 > index 3062abd..000 > --- a/drivers/leds/leds-lm3556.c > +++ /dev/null > @@ -1,512 +0,0 @@ > -/* > - * Simple driver for Texas Instruments LM3556 LED Flash driver chip (Rev0x03) > - * Copyright (C) 2012 Texas Instruments > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of the GNU General Public License version 2 as > - * published by the Free Software Foundation. > - * > - * Please refer Documentation/leds/leds-lm3556.txt file. > - */ > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > -#include > - > -#define REG_FILT_TIME (0x0) > -#define REG_IVFM_MODE (0x1) > -#define REG_NTC(0x2) > -#define REG_INDIC_TIME (0x3) > -#define REG_INDIC_BLINK(0x4) > -#define REG_INDIC_PERIOD (0x5) > -#define REG_TORCH_TIME (0x6) > -#define REG_CONF (0x7) > -#define REG_FLASH (0x8) > -#define REG_I_CTRL (0x9) > -#define REG_ENABLE (0xA) > -#define REG_FLAG (0xB) > -#define REG_MAX(0xB) > - > -#define IVFM_FILTER_TIME_SHIFT (3) > -#define UVLO_EN_SHIFT (7) > -#define HYSTERSIS_SHIFT(5) > -#define IVM_D_TH_SHIFT (2) > -#define IVFM_ADJ_MODE_SHIFT(0) > -#define NTC_EVENT_LVL_SHIFT(5) > -#define NTC_TRIP_TH_SHIFT (2) > -#define NTC_BIAS_I_LVL_SHIFT (0) > -#define INDIC_RAMP_UP_TIME_SHIFT (3) > -#define INDIC_RAMP_DN_TIME_SHIFT (0) > -#define INDIC_N_BLANK_SHIFT(4) > -#define INDIC_PULSE_TIME_SHIFT (0) > -#define INDIC_N_PERIOD_SHIFT (0) > -#define TORCH_RAMP_UP_TIME_SHIFT (3) > -#define TORCH_RAMP_DN_TIME_SHIFT (0) > -#define STROBE_USUAGE_SHIFT(7) > -#define STROBE
Re: [PATCH v2 1/2] mfd: dt: tps6586x: Add power off control
On 08/24/2012 06:36 PM, Bill Huang wrote: >>> On Sun, Aug 19, 2012 at 06:07:55PM -0700, Bill Huang wrote: Add DT property "ti,system-power-controller" telling whether or not this pmic is in charge of controlling the system power, so the power off routine can be hooked up to system call "pm_power_off". ... >>> I've seen the following while trying this patch applied on top of >>> next-20120817: >>> >>> [ 40.581151] Power down. >>> [ 41.583160] [ cut here ] >>> [ 41.587784] WARNING: at >>> /home/thierry.reding/src/kernel/linux-ipmp.git/drivers/i2c/busses/i2c- >>> tegra.c:525 tegra_i2c_xfer+0x21c/0x29c() ... >> Thanks Thierry, I can repro this on Tegra20 inconsistently and found, if >> current cpu is not cpu0 when >> doing "machine_shutdown" (it will call "smp_send_stop"), i2c controller will >> failed to do any >> transaction (looks like gic interrupt will be disabled), I'll debug further >> to find out the root cause. >> >> By the way, Tegra30 is good since it will always be cpu0 when doing >> "machine_shutdown", I still don't >> know why it makes the difference against Tegra20 since I'm not familiar with >> those cpu stuffs and what >> make it behave differently, I'll study a bit, thanks. > > I've sent the shutdown issue for discussion in ARM list: Shutdown problem in > SMP system happened on Tegra20. > The cause of the i2c timeout is pretty clear now and it is not directly > related to this patch, so is this > patch series acceptable? Any thoughts or comment? Thanks. I tend to agree; power off never worked without this patch, and sometimes does with the patch, due to nothing wrong with this patch. Bill, please do follow up on getting the underlying Tegra issue solved somehow though. IIRC, Joseph Lo or Prashant has a patch which enabled the config option that Russell mentioned, so the fix may just be to wait for that patch to get finalized, but please double-check that solves it. Thanks! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCHv4 0/9] *** ARM: Update arch-vt8500 to Devicetree ***
On 08/23/2012 01:35 AM, Tony Prisk wrote: > This patchset updates arch-vt8500 to devicetree and removes all the old-style > code. Support for WM8650 has also been added. > > Example dts/dtsi files are given for the three currently supported models. > > Major changes: > > GPIO code has been converted to a platform_device and rewritten as WM8505 > support was broken. Add support for WM8650 gpio controller. > > UHCI support was missing. Added this as a generic non-pci uhci controller as > it doesn't require anything special. Should be usable by any system that > doesn't > have special requirements to get the UHCI controller working. > > Framebuffer code patched to support WM8650. The bindings for this are of > concern > but there doesn't seem to be a formalized binding yet. This patch is based off > Sascha Hauer's current patch on the dri-devel mailing list and should be > easily > patched out when its finalized. > > Patchset based on Arnd's arm-soc/for-next branch. I believe all the issues I pointed out are fixed in this series. I'm not sure I reviewed it in enough detail to ack it, but I'm fine with what I saw. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next v1 3/3] forcedeth: prevent TX timeouts after reboot
This complements patch "net-forcedeth: fix TX timeout caused by TX pause on down link" which ensures that a lock-up sequence is not sent to the NIC. Present patch ensures that if a NIC is already locked-up, the driver will recover from it when initializing the device. It does the equivalent of the following recovery sequence: - write NVREG_TX_PAUSEFRAME_ENABLE_V1 to eth1's register NvRegTxPauseFrame - write NVREG_XMITCTL_START to eth1's register NvRegTransmitterControl - write 0 to eth1's register NvRegTransmitterControl (this is at the heart of the "unbricking" sequence mentioned in patch "net-forcedeth: fix TX timeout caused by TX pause on down link") Tested: - hardware is MCP55 device id 10de:0373 (rev a3), dual-port - reboot a kernel without any of patches mentioned - freeze the NIC (details on description for commit "net-forcedeth: fix TX timeout caused by TX pause on down link") - wait 5mn until ping hangs & TX timeout in dmesg - reboot on kernel with present patch - host is immediatly operational, no TX timeout Signed-off-by: David Decotigny --- drivers/net/ethernet/nvidia/forcedeth.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 8b82457..edd6221 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -5905,11 +5905,18 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + netif_carrier_off(dev); + + /* Some NICs freeze when TX pause is enabled while NIC is +* down, and this stays across warm reboots. The sequence +* below should be enough to recover from that state. */ + nv_update_pause(dev, 0); + nv_start_tx(dev); + nv_stop_tx(dev); + if (id->driver_data & DEV_HAS_VLAN) nv_vlan_mode(dev, dev->features); - netif_carrier_off(dev); - dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", dev->name, np->phy_oui, np->phyaddr, dev->dev_addr); -- 1.7.10.2.5.g20d7bc9 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next v1 2/3] forcedeth: fix TX timeout caused by TX pause on down link
On some dual-port forcedeth devices such as MCP55 10de:0373 (rev a3), when autoneg & TX pause are enabled while port is connected but interface is down, the NIC will eventually freeze (TX timeouts, network unreachable). This patch ensures that TX pause is not configured in hardware when interface is down. The TX pause request will be honored when interface is later configured. Tested: - hardware is MCP55 device id 10de:0373 (rev a3), dual-port - eth0 connected and UP, eth1 connected but DOWN - without this patch, following sequence would brick NIC: ifconfig eth0 down ifconfig eth1 up ifconfig eth1 down ethtool -A eth1 autoneg off rx on tx off ifconfig eth1 up ifconfig eth1 down ethtool -A eth1 autoneg on rx on tx on ifconfig eth1 up ifconfig eth1 down ifup eth0 sleep 120 # or longer ethtool eth1 Just in case, sequence to un-brick: ifconfig eth0 down ethtool -A eth1 autoneg off rx on tx off ifconfig eth1 up ifconfig eth1 down ifup eth0 - with this patch: no TX timeout after "bricking" sequence above Details: - The following register accesses have been identified as the ones causing the NIC to freeze in "bricking" sequence above: - write NVREG_TX_PAUSEFRAME_ENABLE_V1 to eth1's register NvRegTxPauseFrame - write NVREG_MISC1_PAUSE_TX | NVREG_MISC1_FORCE to eth1's register NvRegMisc1 - write 0 to eth1's register NvRegTransmitterControl This is what this patch avoids. Signed-off-by: David Decotigny --- drivers/net/ethernet/nvidia/forcedeth.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 51d19d8..8b82457 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3409,7 +3409,7 @@ set_speed: pause_flags = 0; /* setup pause frame */ - if (np->duplex != 0) { + if (netif_running(dev) && (np->duplex != 0)) { if (np->autoneg && np->pause_flags & NV_PAUSEFRAME_AUTONEG) { adv_pause = adv & (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); lpa_pause = lpa & (LPA_PAUSE_CAP | LPA_PAUSE_ASYM); @@ -5455,6 +5455,7 @@ static int nv_close(struct net_device *dev) netif_stop_queue(dev); spin_lock_irq(&np->lock); + nv_update_pause(dev, 0); /* otherwise stop_tx bricks NIC */ nv_stop_rxtx(dev); nv_txrx_reset(dev); -- 1.7.10.2.5.g20d7bc9 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next v1 1/3] forcedeth: fix buffer overflow
Found by manual code inspection. Tested: compile, reboot, ethtool -d ethX Signed-off-by: David Decotigny --- drivers/net/ethernet/nvidia/forcedeth.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index f45def0..51d19d8 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -4435,7 +4435,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void regs->version = FORCEDETH_REGS_VER; spin_lock_irq(&np->lock); - for (i = 0; i <= np->register_size/sizeof(u32); i++) + for (i = 0; i < np->register_size/sizeof(u32); i++) rbuf[i] = readl(base + i*sizeof(u32)); spin_unlock_irq(&np->lock); } -- 1.7.10.2.5.g20d7bc9 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next v1 0/3] forcedeth: fix device lock-up for dual-port NICs
On a dual port MCP55 10de:0373 (rev a3) NIC with both ports connected, we identified a configuration that does freeze the whole NIC: having autoneg & TX pause turned on while one port is physically connected but interface is down (eg. eth1) eventually causes the whole NIC to freeze (eth1 and... eth0). This triggers TX timeouts on the UP interface and, more generally, an unreachable network. In order to avoid the bug, all we have to do is make sure not to configure TX pause on the hardware while NIC is down. This is what the 2nd patch of the series does (details included). And, in case the NIC is in a bad state at reboot (should not happen anymore thanks to patch above), third patch basically always makes sure to fix the NIC when module is loaded. I could only test this with a MCP55 10de:0373 (rev a3) PCI device on a x86_64 host. Any feedback on these patches welcome! In particular, please let me know if this should not apply to other hardware. # Patch Set Summary: David Decotigny (3): forcedeth: fix buffer overflow forcedeth: fix TX timeout caused by TX pause on down link forcedeth: prevent TX timeouts after reboot drivers/net/ethernet/nvidia/forcedeth.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) -- 1.7.10.2.5.g20d7bc9 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 2/2] Linux 3.2.27-rt41-rc1
--- localversion-rt |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localversion-rt b/localversion-rt index 629e0b4..31c892a 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt41 +-rt42-rc1 -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 1/2] fix printk flush of messages
Updates console-make-rt-friendly.patch #ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by printk() because: # some liberties taken in this pseudo-code to make it easier to follow printk() vprintk() raw_spin_lock(&logbuf_lock) # increment preempt_count(): preempt_disable() result = console_trylock_for_printk() retval = 0 # lock will always be false, because preempt_count() will be >= 1 lock = ... && !preempt_count() if (lock) retval = 1 return retval # result will always be false since lock will always be false if (result) console_unlock() # this is where the printk() output would be flushed On system boot some printk() output is flushed because register_console() and tty_open() call console_unlock(). This change also fixes the problem that was previously fixed by preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch Signed-off-by: Frank Rowand Cc: Frank Link: http://lkml.kernel.org/r/4fb44fd0.4090...@am.sony.com Signed-off-by: Thomas Gleixner --- kernel/printk.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk.c b/kernel/printk.c index 9ea..66e83e5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -836,7 +836,7 @@ static int console_trylock_for_printk(unsigned int cpu, unsigned long flags) int retval = 0, wake = 0; #ifdef CONFIG_PREEMPT_RT_FULL int lock = (!early_boot_irqs_disabled && !irqs_disabled_flags(flags) && - !preempt_count()) || sysrq_in_progress; + (preempt_count() <= 1)) || sysrq_in_progress; #else int lock = 1; #endif -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RT 0/2] [ANNOUNCE] 3.2.28-rt42-rc1 stable review
Dear RT Folks, This is the RT stable review cycle of patch 3.2.28-rt42-rc1. Please scream at me if I messed something up. Please test the patches too. The -rc release will be uploaded to kernel.org and will be deleted when the final release is out. This is just a review release (or release candidate). The pre-releases will not be pushed to the git repository, only the final release is. If all goes well, this patch will be converted to the next main release on 8/29/2012 (The late Michael Jackson's B-Day, and also one of my friends from high school, who hated Michael Jackson, and also hated to be reminded that he shared the same B-Day). Enjoy, -- Steve To build 3.2.28-rt42-rc1 directly, the following patches should be applied: http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.28.xz http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.28-rt42-rc1.patch.xz You can also build from 3.2.28-rt41 by applying the incremental patch: http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/incr/patch-3.2.28-rt41-rt42-rc1.patch.xz Changes from 3.2.28-rt41: --- Frank Rowand (1): fix printk flush of messages Steven Rostedt (1): Linux 3.2.27-rt41-rc1 kernel/printk.c |2 +- localversion-rt |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[ANNOUNCE] 3.2.28-rt41 (this is for real)
Dear RT Folks, I'm pleased to announce the 3.2.28-rt41 stable release. This release is just an update to the new stable 3.2.28 version and no RT specific changes have been made. You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d Or to build 3.2.28-rt41 directly, the following patches should be applied: http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.28.xz http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.28-rt41.patch.xz Enjoy, -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ANNOUNCE] 3.2.27-rt40
On Fri, 2012-08-24 at 22:37 -0400, Steven Rostedt wrote: > Dear RT Folks, > > I'm pleased to announce the 3.2.27-rt40 stable release. Bah, Evolution is crashing on my /tmp directory (where my scripts place the files). There's a bug in the gtk4 file manager (I'm using xfce), where if the directory changes it crashes the entire app :-p. Thus I moved the output file to another directory to post. But I copied an old version (we're at 3.2.28-rt41 now). Ignore this announcement, I'll send out another one :-( -- Steve > > > This release is just an update to the new stable 3.2.27 version > and no RT specific changes have been made. > > > You can get this release via the git tree at: > > git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git > > Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d > > > Or to build 3.2.27-rt40 directly, the following patches should be applied: > > http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz > > http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.27.xz > > > http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.27-rt40.patch.xz > > > > Enjoy, > > -- Steve > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[ANNOUNCE] 3.2.27-rt40
Dear RT Folks, I'm pleased to announce the 3.2.27-rt40 stable release. This release is just an update to the new stable 3.2.27 version and no RT specific changes have been made. You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d Or to build 3.2.27-rt40 directly, the following patches should be applied: http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.27.xz http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.27-rt40.patch.xz Enjoy, -- Steve -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] mtd: cmdlinepart: fix the wrong partitions number when truncating occurs
Assume we have a 1GB(8Gb) nand chip, and we set the partitions in the command line like this: #gpmi-nand:100m(boot),100m(kernel),1g(rootfs) In this case, the partition truncating occurs. The current code will get the following result: -- root@freescale ~$ cat /proc/mtd dev:size erasesize name mtd0: 0640 0004 "boot" mtd1: 0640 0004 "kernel" -- It is obvious that we lost the truncated partition `rootfs` which should be 824M in this case. Why? The old code sets the wrong partitions number when the truncating occurs. This patch fixes it. Alao add a `break` to shortcut the code in this case. After apply this patch, the result becomes: -- root@freescale ~$ cat /proc/mtd dev:size erasesize name mtd0: 0640 0004 "boot" mtd1: 0640 0004 "kernel" mtd2: 3380 0004 "rootfs" -- We get the right result. Signed-off-by: Huang Shijie --- v1 --> v2: [1] add more commit info. --- drivers/mtd/cmdlinepart.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c index 4558e0f..fc960a3 100644 --- a/drivers/mtd/cmdlinepart.c +++ b/drivers/mtd/cmdlinepart.c @@ -344,7 +344,8 @@ static int parse_cmdline_partitions(struct mtd_info *master, "%s: partitioning exceeds flash size, truncating\n", part->mtd_id); part->parts[i].size = master->size - offset; - part->num_parts = i; + part->num_parts = i + 1; + break; } offset += part->parts[i].size; } -- 1.7.4.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] kdump: remove unused including
From: Wei Yongjun Remove including that don't need it. Signed-off-by: Wei Yongjun --- kernel/kexec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index 0668d58..5e4bd78 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2
On 25/08/12 13:19, Ben Hutchings wrote: > On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote: >> On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote: >>> Hi, >>> >>> Changes since v1: >>> >>> - Fixed preempt handling in alpha idle loop >>> - added ack from Geert >>> - fixed stable email address, sorry :-/ >>> >>> This time I built tested everywhere but: h8300 (compiler internal error), >>> and mn10300, parisc, score (cross compilers not available in >>> ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/) >>> >>> For testing, you can pull from: >>> >>> git://github.com/fweisbec/linux-dynticks.git >>> rcu/idle-fix-v2 >>> >>> Thanks. >> >> I have queued these on -rcu branch rcu/idle: >> >> git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git >> >> This problem has been in place since 3.3, so it is hard to argue that >> it is a regression for this merge window. I have therefore queued it >> for 3.7. > > I don't follow that; I would expect any serious bug fix (serious enough > for a stable update) to be acceptable for 3.6 at this point. > > If the regression occurred in 3.3, then the cc lines should be something > like: > > Cc: # 3.3+ > > and not the current: > > Cc: 3.2.x.. The Alpha patches fix an even earlier regression resulting in RCU CPU stalls on an SMP kernel built for generic Alpha (which includes the current Debian 3.2-alpha-smp kernel) and renders the kernel pretty much unuseable. I've only tested the two alpha patches together but maybe just the first patch (1/11 alpha: Fix preemption handling in idle loop) might be needed to fix the problem in 3.2. I'll test and let you know. Cheers Michael. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
On Fri, Aug 24, 2012 at 6:49 PM, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu wrote: >> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: >>> Depending on the platform, init_memory_mapping() may be called multiple >>> times. Move it out to setup_arch() to avoid writing to cr4 on every call. >>> >>> Signed-off-by: Jacob Shin >>> --- >>> arch/x86/kernel/setup.c | 10 ++ >>> arch/x86/mm/init.c | 10 -- >>> 2 files changed, 10 insertions(+), 10 deletions(-) >>> >>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >>> index f4b9b80..751e020 100644 >>> --- a/arch/x86/kernel/setup.c >>> +++ b/arch/x86/kernel/setup.c >>> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) >>> >>> init_gbpages(); >>> >>> + /* Enable PSE if available */ >>> + if (cpu_has_pse) >>> + set_in_cr4(X86_CR4_PSE); >>> + >>> + /* Enable PGE if available */ >>> + if (cpu_has_pge) { >>> + set_in_cr4(X86_CR4_PGE); >>> + __supported_pte_mask |= _PAGE_GLOBAL; >>> + } >>> + >> >> please don't put it directly in setup_arch(). >> >> and another function. >> > > Jacob, hpa > > can you use attached one to replace the first patch? Please use attached two instead. Thanks Yinghai get_page_size_mask_v3.patch Description: Binary data mr_cal.patch Description: Binary data
Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu wrote: > On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: >> Depending on the platform, init_memory_mapping() may be called multiple >> times. Move it out to setup_arch() to avoid writing to cr4 on every call. >> >> Signed-off-by: Jacob Shin >> --- >> arch/x86/kernel/setup.c | 10 ++ >> arch/x86/mm/init.c | 10 -- >> 2 files changed, 10 insertions(+), 10 deletions(-) >> >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >> index f4b9b80..751e020 100644 >> --- a/arch/x86/kernel/setup.c >> +++ b/arch/x86/kernel/setup.c >> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) >> >> init_gbpages(); >> >> + /* Enable PSE if available */ >> + if (cpu_has_pse) >> + set_in_cr4(X86_CR4_PSE); >> + >> + /* Enable PGE if available */ >> + if (cpu_has_pge) { >> + set_in_cr4(X86_CR4_PGE); >> + __supported_pte_mask |= _PAGE_GLOBAL; >> + } >> + > > please don't put it directly in setup_arch(). > > and another function. > Jacob, hpa can you use attached one to replace the first patch? Thanks Yinghai get_page_size_mask.patch Description: Binary data
[PATCH] staging: csr: use is_zero_ether_addr() instead of memcmp()
From: Wei Yongjun Using is_zero_ether_addr() instead of directly use memcmp() to determine if the ethernet address is all zeros. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun --- drivers/staging/csr/sme_wext.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/csr/sme_wext.c b/drivers/staging/csr/sme_wext.c index 7e85907..f09a738 100644 --- a/drivers/staging/csr/sme_wext.c +++ b/drivers/staging/csr/sme_wext.c @@ -1191,8 +1191,6 @@ unifi_siwap(struct net_device *dev, struct iw_request_info *info, netInterface_priv_t *interfacePriv = (netInterface_priv_t *)netdev_priv(dev); unifi_priv_t *priv = interfacePriv->privPtr; int err = 0; -const unsigned char zero_bssid[ETH_ALEN] = {0x00, 0x00, 0x00, -0x00, 0x00, 0x00}; func_enter(); @@ -1213,7 +1211,7 @@ unifi_siwap(struct net_device *dev, struct iw_request_info *info, unifi_trace(priv, UDBG1, "unifi_siwap: asked for %pM\n", wrqu->ap_addr.sa_data); -if (!memcmp(wrqu->ap_addr.sa_data, zero_bssid, ETH_ALEN)) { +if (is_zero_ether_addr(wrqu->ap_addr.sa_data)) { priv->ignore_bssid_join = FALSE; err = sme_mgt_disconnect(priv); if (err) { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > Depending on the platform, init_memory_mapping() may be called multiple > times. Move it out to setup_arch() to avoid writing to cr4 on every call. > > Signed-off-by: Jacob Shin > --- > arch/x86/kernel/setup.c | 10 ++ > arch/x86/mm/init.c | 10 -- > 2 files changed, 10 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index f4b9b80..751e020 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) > > init_gbpages(); > > + /* Enable PSE if available */ > + if (cpu_has_pse) > + set_in_cr4(X86_CR4_PSE); > + > + /* Enable PGE if available */ > + if (cpu_has_pge) { > + set_in_cr4(X86_CR4_PGE); > + __supported_pte_mask |= _PAGE_GLOBAL; > + } > + please don't put it directly in setup_arch(). and another function. Thanks Yinghai > /* max_pfn_mapped is updated here */ > max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_pfn_mapped = max_low_pfn_mapped; > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index e0e6990..2f07e09 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -149,16 +149,6 @@ unsigned long __init_refok init_memory_mapping(unsigned > long start, > use_gbpages = direct_gbpages; > #endif > > - /* Enable PSE if available */ > - if (cpu_has_pse) > - set_in_cr4(X86_CR4_PSE); > - > - /* Enable PGE if available */ > - if (cpu_has_pge) { > - set_in_cr4(X86_CR4_PGE); > - __supported_pte_mask |= _PAGE_GLOBAL; > - } > - > if (use_gbpages) > page_size_mask |= 1 << PG_LEVEL_1G; > if (use_pse) > -- > 1.7.9.5 > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix
On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > There could be cases where user supplied memmap=exactmap memory > mappings do not mark the region where the kernel .text .data and > .bss reside as E820_RAM as reported here: > > https://lkml.org/lkml/2012/8/14/86 > > Handle it by complaining, and adding the range back into the e820. > > Signed-off-by: Jacob Shin > --- > arch/x86/kernel/setup.c | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index 4217fb4..b84aceb5 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p) > insert_resource(&iomem_resource, &data_resource); > insert_resource(&iomem_resource, &bss_resource); > > + /* > +* Complain if .text .data and .bss are not marked as E820_RAM and > +* attempt to fix it by adding the range. We may have a confused BIOS, > +* or the user may have incorrectly supplied it via memmap=exactmap. > If > +* we really are running on top non-RAM, we will crash later anyways. > +*/ > + if (!e820_all_mapped(code_resource.start, bss_resource.end, > E820_RAM)) { > + pr_warn(".text .data .bss are not marked as E820_RAM!\n"); > + > + e820_add_region(code_resource.start, > + bss_resource.end - code_resource.start + 1, > + E820_RAM); > + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), > &e820.nr_map); this sanitze_e820_map could be spared. trim_bios_range will that always. > + } > + > trim_bios_range(); > #ifdef CONFIG_X86_32 > if (ppro_with_ram_bug()) { also should use brk_limit instead of bss_resource.end. aka need to keep the map for brk area. Thanks Yinghai -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2
On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote: > On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote: > > Hi, > > > > Changes since v1: > > > > - Fixed preempt handling in alpha idle loop > > - added ack from Geert > > - fixed stable email address, sorry :-/ > > > > This time I built tested everywhere but: h8300 (compiler internal error), > > and mn10300, parisc, score (cross compilers not available in > > ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/) > > > > For testing, you can pull from: > > > > git://github.com/fweisbec/linux-dynticks.git > > rcu/idle-fix-v2 > > > > Thanks. > > I have queued these on -rcu branch rcu/idle: > > git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git > > This problem has been in place since 3.3, so it is hard to argue that > it is a regression for this merge window. I have therefore queued it > for 3.7. I don't follow that; I would expect any serious bug fix (serious enough for a stable update) to be acceptable for 3.6 at this point. If the regression occurred in 3.3, then the cc lines should be something like: Cc: # 3.3+ and not the current: Cc: 3.2.x.. (Note, version annotations should be on the right of the address, not in the 'real name' position on the left.) Ben. -- Ben Hutchings Experience is what causes a person to make new mistakes instead of old ones. signature.asc Description: This is a digitally signed message part
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On 08/24/2012 05:49 PM, Jacob Shin wrote: > > Right, I think what I was attempting to do was to merge the 1MB > with E820_RAM right above 1MB: > > So instead of: > > init_memory_mapping(0, 1MB) > init_memory_mapping(1MB, 2GB) > > It would be: > > init_memory_mapping(0, 2GB) > > While taking care of the odd case where there is a gap right after > 1MB. > > But if its not worth it, I can move it out of the loop. > What is the benefit? -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin wrote: > Currently direct mappings are created for [ 0 to max_low_pfn< and [ 4GB to max_pfn< backed by actual DRAM. This is fine for holes under 4GB which are covered > by fixed and variable range MTRRs to be UC. However, we run into trouble > on higher memory addresses which cannot be covered by MTRRs. > > Our system with 1TB of RAM has an e820 that looks like this: > > BIOS-e820: [mem 0x-0x000983ff] usable > BIOS-e820: [mem 0x00098400-0x0009] reserved > BIOS-e820: [mem 0x000d-0x000f] reserved > BIOS-e820: [mem 0x0010-0xc7eb] usable > BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data > BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS > BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved > BIOS-e820: [mem 0xfec0-0xfec0] reserved > BIOS-e820: [mem 0xfee0-0xfee00fff] reserved > BIOS-e820: [mem 0xfff0-0x] reserved > BIOS-e820: [mem 0x0001-0x00e037ff] usable > BIOS-e820: [mem 0x00e03800-0x00fc] reserved > BIOS-e820: [mem 0x0100-0x011ffeff] usable > > and so direct mappings are created for huge memory hole between > 0x00e03800 to 0x0100. Even though the kernel never > generates memory accesses in that region, since the page tables mark > them incorrectly as being WB, our (AMD) processor ends up causing a MCE > while doing some memory bookkeeping/optimizations around that area. > > This patch iterates through e820 and only direct maps ranges that are > marked as E820_RAM, and keeps track of those pfn ranges. Depending on > the alignment of E820 ranges, this may possibly result in using smaller > size (i.e. 4K instead of 2M or 1G) page tables. > > Signed-off-by: Jacob Shin > --- > arch/x86/include/asm/page_types.h |9 +++ > arch/x86/kernel/setup.c | 125 > + > arch/x86/mm/init.c|2 + > arch/x86/mm/init_64.c |6 +- > 4 files changed, 112 insertions(+), 30 deletions(-) > > diff --git a/arch/x86/include/asm/page_types.h > b/arch/x86/include/asm/page_types.h > index e21fdd1..409047a 100644 > --- a/arch/x86/include/asm/page_types.h > +++ b/arch/x86/include/asm/page_types.h > @@ -3,6 +3,7 @@ > > #include > #include > +#include > > /* PAGE_SHIFT determines the page size */ > #define PAGE_SHIFT 12 > @@ -40,12 +41,20 @@ > #endif /* CONFIG_X86_64 */ > > #ifndef __ASSEMBLY__ > +#include > > extern int devmem_is_allowed(unsigned long pagenr); > > extern unsigned long max_low_pfn_mapped; > extern unsigned long max_pfn_mapped; > > +extern struct range pfn_mapped[E820_X_MAX]; > +extern int nr_pfn_mapped; > + > +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long > end_pfn); > +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long > end_pfn); > +extern bool pfn_is_mapped(unsigned long pfn); > + > static inline phys_addr_t get_max_mapped(void) > { > return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index 751e020..4217fb4 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -115,13 +115,46 @@ > #include > > /* > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. > - * The direct mapping extends to max_pfn_mapped, so that we can directly > access > - * apertures, ACPI and other tables without having to play with fixmaps. > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB > + * max_pfn_mapped: highest direct mapped pfn over 4GB > + * > + * The direct mapping only covers E820_RAM regions, so the ranges and gaps > are > + * represented by pfn_mapped > */ > unsigned long max_low_pfn_mapped; > unsigned long max_pfn_mapped; > > +struct range pfn_mapped[E820_X_MAX]; > +int nr_pfn_mapped; > + > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) > +{ > + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, > +nr_pfn_mapped, start_pfn, > end_pfn); > + > + max_pfn_mapped = max(max_pfn_mapped, end_pfn); > + > + if (end_pfn <= (1UL << (32 - PAGE_SHIFT))) > + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn); > +} > + > +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) > +{ > + int i; > + > + for (i = 0; i < nr_pfn_mapped; i++) > + if ((start_pfn >= pfn_mapped[i].start) && > + (end_pfn <= pfn_mapped[i].end)) > + return true; > + > + return false; > +} > + > +bool pfn_is_mapped(unsigned long pfn) > +{ > + return pfn_range_is_mapped(pfn, pfn + 1); > +} > + looks like you could avoid add pfn_mapped[] array
Re: BUG: Kprobe smoke test: 2 out of 6 tests failed
On Fri, 2012-08-24 at 09:41 -0400, Steven Rostedt wrote: > On Fri, 2012-08-24 at 15:15 +0800, Fengguang Wu wrote: > > Hi Steven, > > > > The following test fails are mostly due to this commit, or one of the > > last 4 commits in > > > > tree: > > git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git > > tip/perf/core > > head: d57c5d51a30152f3175d2344cb6395f08bf8ee0c > > commit: d57c5d51a30152f3175d2344cb6395f08bf8ee0c [100/100] ftrace/x86: Add > > support for -mfentry to x86_64 > > > > [9.084881] Kprobe smoke test failed: register_jprobe returned -22 > > [9.086786] Kprobe smoke test failed: register_jprobes returned -22 Masami, Seems that when we use fentry, we break jprobes. I thought the patches that we added would just move the call to the next op, not fail totally? Are jprobes deprecated? -- Steve > > [9.121281] BUG: Kprobe smoke test: 2 out of 6 tests failed > > [9.171132] Testing tracer function: PASSED > > [9.408938] Testing dynamic ftrace: PASSED > > > > Thanks, > > Fengguang > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Drop support for x86-32
On 25/08/12 02:36, Alan Cox wrote: >> almost all x86-32 boxes will be trash in 2017, remaining boxes will >> use long term tree > People will still be manufacturing 32bit x86 processors in 2017 I'm quite > sure. You appear entirely out of touch. There are already serious > discussions going on about things like the kernel modifications needed to > make 32bit systems run past 2038. > > Besides which what Linux supports is defined by what peope chose to > contribute code for. We support 32bit 680x0 machines that have been > obsolete for nigh on 20 years because someone chooses to support them. > > For that matter if someone comes along with DEC-10 port and it works as > was clean without messing up the core I'm sure we'd add that too! Is that a hint? :P > Alan > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] kernel.h: Introduce IDIV_ROUND_CLOSEST
DIV_ROUND_CLOSEST returns a bad result for negative dividends: DIV_ROUND_CLOSEST(-2, 2) = 0 Most of the time this does not matter. However, in the hardware monitoring subsystem, it is often used on integers which can be negative (such as temperatures). Introduce new macro IDIV_ROUND_CLOSEST which also supports negative dividends. Signed-off-by: Guenter Roeck --- I can take this patch through my hwmon tree, but would like to get an Ack first. Alternative would be to put it into include/linux/hwmon.h, but I would prefer to avoid that. Also, if someone has an idea for a simpler implementation, I would really like to know about it. include/linux/kernel.h |9 + 1 file changed, 9 insertions(+) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6043821..a89483c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -89,6 +89,15 @@ } \ ) +#define IDIV_ROUND_CLOSEST(x, divisor)(\ +{ \ + typeof(x) __x = x; \ + typeof(divisor) __d1 = divisor; \ + typeof(divisor) __d2 = (__x) < 0 ? -(__d1) : (__d1);\ + (((__x) + ((__d2) / 2)) / (__d1)); \ +} \ +) + /* * Multiplies an integer by a fraction, while avoiding unnecessary * overflow or loss of precision. -- 1.7.9.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Drop support for x86-32
On 25/08/12 03:05, wbrana wrote: > On 8/24/12, Martin Nybo Andersen wrote: >> What I'd hate even more is rendering my old working hardware useless by >> removing x86-32 support from the kernel. To reason the removal by saying >> "Microsoft plans to do it" just makes me go bonkers... > Your old hardware will work fine with long term kernel. People won't want to be forced to stick with an old version of the kernel which, as you said, will not have any backported features. People deserve the choice to use whatever they have, however they want. That's the way it works. The was it has been, currently is, and always will be. ...Unless someone at Microsoft* holds Linus hostage** in order to take over Linux kernel development. Not that it's likely to ever happen *Not being a troll or hurling personal insults at Microsoft - It's just that they currently have the majority share on the desktop (and made the original announcement for W9) **If this ever happens, even if it's by a terrorist group and not a company, please don't sue me for conspiracy to kidnapping. It was just an example :) > >> These legacy apps will most likely be compiled for x86-32 and not x32 (an >> argument for not removing x86-32 support on a running x86-64 kernel). > Which legacy apps do you mean? > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 05:30:21PM -0700, H. Peter Anvin wrote: > On 08/24/2012 04:55 PM, Jacob Shin wrote: > >+ > >+for (i = 0; i < e820.nr_map; i++) { > >+struct e820entry *ei = &e820.map[i]; > >+u64 start = ei->addr; > >+u64 end = ei->addr + ei->size; > >+ > >+/* we only map E820_RAM */ > >+if (ei->type != E820_RAM) > >+continue; > >+ > >+if (end <= ISA_END_ADDRESS) > >+continue; > >+ > >+if (start <= ISA_END_ADDRESS) > >+start = 0; > >+#ifdef CONFIG_X86_32 > >+/* on 32 bit, we only map up to max_low_pfn */ > >+if ((start >> PAGE_SHIFT) >= max_low_pfn) > >+continue; > >+ > >+if ((end >> PAGE_SHIFT) > max_low_pfn) > >+end = max_low_pfn << PAGE_SHIFT; > >+#endif > >+/* the ISA range is always mapped regardless of holes */ > >+if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) && > >+start != 0) > >+init_memory_mapping(0, ISA_END_ADDRESS); > >+ > >+init_memory_mapping(start, end); > >+} > >+ > > The ISA range mapping doesn't really make sense *inside* the loop, > no? It seems you could do that before you enter the loop and then > simply have: > > + if (end <= ISA_END_ADDRESS) > + continue; > + > + if (start <= ISA_END_ADDRESS) > + start = ISA_END_ADDRESS; > > ... no? Right, I think what I was attempting to do was to merge the 1MB with E820_RAM right above 1MB: So instead of: init_memory_mapping(0, 1MB) init_memory_mapping(1MB, 2GB) It would be: init_memory_mapping(0, 2GB) While taking care of the odd case where there is a gap right after 1MB. But if its not worth it, I can move it out of the loop. > > -hpa > > -- > H. Peter Anvin, Intel Open Source Technology Center > I work for Intel. I don't speak on their behalf. > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH v2 1/2] mfd: dt: tps6586x: Add power off control
nvpublic > > On Sun, Aug 19, 2012 at 06:07:55PM -0700, Bill Huang wrote: > > > Add DT property "ti,system-power-controller" telling whether or not > > > this pmic is in charge of controlling the system power, so the power > > > off routine can be hooked up to system call "pm_power_off". > > > > > > Based on the work by: > > > Dan Willemsen > > > > > > Signed-off-by: Bill Huang > > > Tested-by: Stephen Warren > > > --- > > > .../devicetree/bindings/regulator/tps6586x.txt |6 ++ > > > drivers/mfd/tps6586x.c | 19 > > > +++ > > > include/linux/mfd/tps6586x.h |1 + > > > 3 files changed, 26 insertions(+), 0 deletions(-) > > > > Hi, > > > > I've seen the following while trying this patch applied on top of > > next-20120817: > > > > [ 40.581151] Power down. > > [ 41.583160] [ cut here ] > > [ 41.587784] WARNING: at > > /home/thierry.reding/src/kernel/linux-ipmp.git/drivers/i2c/busses/i2c- > > tegra.c:525 tegra_i2c_xfer+0x21c/0x29c() > > [ 41.599850] Modules linked in: > > [ 41.602927] [] (unwind_backtrace+0x0/0xf8) from [] > > (warn_slowpath_common+0x4c/0x64) > > [ 41.612304] [] (warn_slowpath_common+0x4c/0x64) from > > [] > > (warn_slowpath_null+0x1c/0x24) > > [ 41.621947] [] (warn_slowpath_null+0x1c/0x24) from [] > > (tegra_i2c_xfer+0x21c/0x29c) > > [ 41.631244] [] (tegra_i2c_xfer+0x21c/0x29c) from [] > > (__i2c_transfer+0x44/0x80) > > [ 41.640192] [] (__i2c_transfer+0x44/0x80) from [] > > (i2c_transfer+0x7c/0xb8) > > [ 41.648796] [] (i2c_transfer+0x7c/0xb8) from [] > > (regmap_i2c_read+0x48/0x64) > > [ 41.657485] [] (regmap_i2c_read+0x48/0x64) from [] > (_regmap_raw_read+0x90/0x98) > > [ 41.666518] [] (_regmap_raw_read+0x90/0x98) from [] > > (_regmap_read+0x50/0xa8) > > [ 41.675290] [] (_regmap_read+0x50/0xa8) from [] > (_regmap_update_bits+0x24/0x64) > > [ 41.684322] [] (_regmap_update_bits+0x24/0x64) from > > [] > > (regmap_update_bits+0x3c/0x58) > > [ 41.693885] [] (regmap_update_bits+0x3c/0x58) from [] > > (tps6586x_power_off+0x18/0x38) > > [ 41.703362] [] (tps6586x_power_off+0x18/0x38) from [] > > (machine_power_off+0x1c/0x24) > > [ 41.712749] [] (machine_power_off+0x1c/0x24) from [] > > (sys_reboot+0x138/0x1b0) > > [ 41.721612] [] (sys_reboot+0x138/0x1b0) from [] > > (ret_fast_syscall+0x0/0x30) > > [ 41.730293] ---[ end trace 9af366974fefa459 ]--- > > [ 41.734906] tegra-i2c tegra-i2c.3: i2c transfer timed out > > [ 41.740689] Kernel panic - not syncing: Attempted to kill init! > > exitcode=0x > > [ 41.740689] > > [ 41.749823] [] (unwind_backtrace+0x0/0xf8) from [] > > (panic+0x8c/0x1d8) > > [ 41.757993] [] (panic+0x8c/0x1d8) from [] > > (do_exit+0x694/0x750) > > [ 41.765636] [] (do_exit+0x694/0x750) from [] > > (do_group_exit+0x3c/0xb0) > > [ 41.773884] [] (do_group_exit+0x3c/0xb0) from [] > > (__wake_up_parent+0x0/0x18) > > Thanks Thierry, I can repro this on Tegra20 inconsistently and found, if > current cpu is not cpu0 when > doing "machine_shutdown" (it will call "smp_send_stop"), i2c controller will > failed to do any > transaction (looks like gic interrupt will be disabled), I'll debug further > to find out the root cause. > > By the way, Tegra30 is good since it will always be cpu0 when doing > "machine_shutdown", I still don't > know why it makes the difference against Tegra20 since I'm not familiar with > those cpu stuffs and what > make it behave differently, I'll study a bit, thanks. > I've sent the shutdown issue for discussion in ARM list: Shutdown problem in SMP system happened on Tegra20. The cause of the i2c timeout is pretty clear now and it is not directly related to this patch, so is this patch series acceptable? Any thoughts or comment? Thanks. > > > > Thierry > > > > * Unknown Key > > * 0x7F3EB3A1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 02/14] aoe: kernel thread handles I/O completions for simple locking
Andrew Morton writes: > On Fri, 17 Aug 2012 21:24:08 -0400 > Ed Cashin wrote: ... >> +sigfillset(&blocked); >> +sigprocmask(SIG_BLOCK, &blocked, NULL); >> +flush_signals(current); > > This is a kernel thread - it shouldn't need to fiddle with signals. ... Thanks for the feedback. I'll try out your suggestions and return with changes and explanations. -- Ed -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On 08/24/2012 04:55 PM, Jacob Shin wrote: + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 start = ei->addr; + u64 end = ei->addr + ei->size; + + /* we only map E820_RAM */ + if (ei->type != E820_RAM) + continue; + + if (end <= ISA_END_ADDRESS) + continue; + + if (start <= ISA_END_ADDRESS) + start = 0; +#ifdef CONFIG_X86_32 + /* on 32 bit, we only map up to max_low_pfn */ + if ((start >> PAGE_SHIFT) >= max_low_pfn) + continue; + + if ((end >> PAGE_SHIFT) > max_low_pfn) + end = max_low_pfn << PAGE_SHIFT; +#endif + /* the ISA range is always mapped regardless of holes */ + if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) && + start != 0) + init_memory_mapping(0, ISA_END_ADDRESS); + + init_memory_mapping(start, end); + } + The ISA range mapping doesn't really make sense *inside* the loop, no? It seems you could do that before you enter the loop and then simply have: + if (end <= ISA_END_ADDRESS) + continue; + + if (start <= ISA_END_ADDRESS) + start = ISA_END_ADDRESS; ... no? -hpa -- H. Peter Anvin, Intel Open Source Technology Center I work for Intel. I don't speak on their behalf. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V5 0/5] clk: mmp: add clock framework for mmp
Quoting Chao Xie (2012-08-19 19:55:10) > From: Chao Xie > arch/arm/mach-mmp/Kconfig|3 + > drivers/clk/Makefile |3 + > drivers/clk/mmp/Makefile |9 + > drivers/clk/mmp/clk-apbc.c | 152 ++ > drivers/clk/mmp/clk-apmu.c | 97 + > drivers/clk/mmp/clk-frac.c | 153 ++ > drivers/clk/mmp/clk-mmp2.c | 449 > ++ > drivers/clk/mmp/clk-pxa168.c | 346 > drivers/clk/mmp/clk-pxa910.c | 320 ++ > drivers/clk/mmp/clk.h| 35 Looks like you are not removing your arch/arm/mach-mmp/clock.c. Is that intentional? When I apply your series against v3.6-rc3 I find that compilation breaks with mmp2_defconfig due to conflicting definitions for the clk api (clk_enable, clk_set_rate, etc). This is not surprising since your legacy clock code is neither deleted nor removed from compilation conditionally by checking for CONFIG_COMMON_CLK. Did I somehow manage to misapply your patches or should your patches have removed the arch-specific clock framework as well? Regards, Mike > 10 files changed, 1567 insertions(+), 0 deletions(-) > create mode 100644 drivers/clk/mmp/Makefile > create mode 100644 drivers/clk/mmp/clk-apbc.c > create mode 100644 drivers/clk/mmp/clk-apmu.c > create mode 100644 drivers/clk/mmp/clk-frac.c > create mode 100644 drivers/clk/mmp/clk-mmp2.c > create mode 100644 drivers/clk/mmp/clk-pxa168.c > create mode 100644 drivers/clk/mmp/clk-pxa910.c > create mode 100644 drivers/clk/mmp/clk.h -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
On Fri, Aug 24, 2012 at 06:55:14PM -0500, Jacob Shin wrote: > Currently direct mappings are created for [ 0 to max_low_pfn< and [ 4GB to max_pfn< backed by actual DRAM. This is fine for holes under 4GB which are covered > by fixed and variable range MTRRs to be UC. However, we run into trouble > on higher memory addresses which cannot be covered by MTRRs. > > Our system with 1TB of RAM has an e820 that looks like this: > > BIOS-e820: [mem 0x-0x000983ff] usable > BIOS-e820: [mem 0x00098400-0x0009] reserved > BIOS-e820: [mem 0x000d-0x000f] reserved > BIOS-e820: [mem 0x0010-0xc7eb] usable > BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data > BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS > BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved > BIOS-e820: [mem 0xfec0-0xfec0] reserved > BIOS-e820: [mem 0xfee0-0xfee00fff] reserved > BIOS-e820: [mem 0xfff0-0x] reserved > BIOS-e820: [mem 0x0001-0x00e037ff] usable > BIOS-e820: [mem 0x00e03800-0x00fc] reserved > BIOS-e820: [mem 0x0100-0x011ffeff] usable > > and so direct mappings are created for huge memory hole between > 0x00e03800 to 0x0100. Even though the kernel never > generates memory accesses in that region, since the page tables mark > them incorrectly as being WB, our (AMD) processor ends up causing a MCE > while doing some memory bookkeeping/optimizations around that area. > > This patch iterates through e820 and only direct maps ranges that are > marked as E820_RAM, and keeps track of those pfn ranges. Depending on > the alignment of E820 ranges, this may possibly result in using smaller > size (i.e. 4K instead of 2M or 1G) page tables. > > Signed-off-by: Jacob Shin > --- > arch/x86/include/asm/page_types.h |9 +++ > arch/x86/kernel/setup.c | 125 > + > arch/x86/mm/init.c|2 + > arch/x86/mm/init_64.c |6 +- > 4 files changed, 112 insertions(+), 30 deletions(-) > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index 751e020..4217fb4 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -115,13 +115,46 @@ > #include > > /* > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. > - * The direct mapping extends to max_pfn_mapped, so that we can directly > access > - * apertures, ACPI and other tables without having to play with fixmaps. > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB > + * max_pfn_mapped: highest direct mapped pfn over 4GB > + * > + * The direct mapping only covers E820_RAM regions, so the ranges and gaps > are > + * represented by pfn_mapped > */ > unsigned long max_low_pfn_mapped; > unsigned long max_pfn_mapped; > > +struct range pfn_mapped[E820_X_MAX]; > +int nr_pfn_mapped; > + > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) > +{ > + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, > + nr_pfn_mapped, start_pfn, end_pfn); > + > + max_pfn_mapped = max(max_pfn_mapped, end_pfn); > + > + if (end_pfn <= (1UL << (32 - PAGE_SHIFT))) > + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn); > +} > + > +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) > +{ > + int i; > + > + for (i = 0; i < nr_pfn_mapped; i++) > + if ((start_pfn >= pfn_mapped[i].start) && > + (end_pfn <= pfn_mapped[i].end)) > + return true; > + > + return false; > +} > + > +bool pfn_is_mapped(unsigned long pfn) > +{ > + return pfn_range_is_mapped(pfn, pfn + 1); > +} > + > #ifdef CONFIG_DMI > RESERVE_BRK(dmi_alloc, 65536); > #endif > @@ -296,6 +329,68 @@ static void __init cleanup_highmap(void) > } > #endif > > +/* > + * Iterate through E820 memory map and create direct mappings for only > E820_RAM > + * regions. We cannot simply create direct mappings for all pfns from > + * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes > in > + * high addresses that cannot be marked as UC by fixed/variable range MTRRs. > + * Depending on the alignment of E820 ranges, this may possibly result in > using > + * smaller size (i.e. 4K instead of 2M or 1G) page tables. > + */ > +static void __init init_memory(void) > +{ > + int i; > + > + init_gbpages(); > + > + /* Enable PSE if available */ > + if (cpu_has_pse) > + set_in_cr4(X86_CR4_PSE); > + > + /* Enable PGE if available */ > + if (cpu_has_pge) { > + set_in_cr4(X86_CR4_PGE); > + __supported_pte_mask |= _PAGE_GLOBAL; > + } > + > + for (i = 0; i < e820.nr_map; i++) { > +
RE: Shutdown problem in SMP system happened on Tegra20
nvpublic > On Fri, Aug 24, 2012 at 04:23:39PM +0800, Bill Huang wrote: > > When doing shutdown on Tegra20/Tegra30, we need to read/write PMIC > > registers through I2C to perform the power off sequence. > > Unfortunately, sometimes we'll fail to shutdown due to I2C timeout on > > Tegra20. And the cause of the timeout is due to the CPU which I2C > > controller IRQ affined to will have chance to be offlined without > > migrating all irqs affined to it, so the following I2C transactions > > will fail (no any CPU will handle that interrupt since then). > > > Some snippet of the shutdown codes: > > > > void kernel_power_off(void) > > { > > kernel_shutdown_prepare(SYSTEM_POWER_OFF); > > : > > disable_nonboot_cpus(); > > : > > machine_power_off(); > > } > > > > void machine_power_off(void) > > { > > machine_shutdown(); > > if (pm_power_off) > > pm_power_off(); /* this is where we send I2C write to shutdown > > */ } > > > > void machine_shutdown(void) > > { > > #ifdef CONFIG_SMP > > smp_send_stop(); > > #endif > > } > > > > In "smp_send_stop()", it will send "IPI_CPU_STOPS" to offline other > > cpus except current cpu (smp_processor_id()), however, current cpu > > will not always be cpu0 at least at Tegra20, that said for example > > cpu1 might be the current cpu and cpu0 will be offlined and this is the > > case why the I2C transaction > will timeout. > > > > For normal case, "disable_nonboot_cpus()" call will disable all other > > Cpus except cpu0, that means we won't hit the problem mentioned here > > since cpu0 will always be the current cpu in the call "smp_send_stop", but > > the call to > "disable_nonboot_cpus" > > will happen only when "CONFIG_PM_SLEEP_SMP" is enabled which is not > > the case for Tegra20/Tegra30, we don't support suspend yet so this can't be > > enabled. > > So what you're asking for is a feature to do what CONFIG_PM_SLEEP_SMP does, > but without > CONFIG_PM_SLEEP_SMP enabled? Yeah pretty much, I'm actually asking should we take care of this since maybe not all platforms will have this config enabled? > > Why not just ensure that CONFIG_PM_SLEEP_SMP is enabled if your platform > requires that the lowest CPU > number be the CPU dealing with reboot? Someday we will have it enabled, but before that we'll hit the issue, so you don't think this should be taken care of? Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/14] aoe driver v49 performance and usability improvements
[second send after HTML part made vger reject my first email] On 32 Aug 2012, Ed Cashin writes: > These patches go a long way to updating the in-kernel aoe driver with > the changes that have been in the coraid.com-distributed version, > bringing it from (aoe internal) version 47 to version 49. They apply > to commit 23dcfa61bac244e1 of the mainline git tree. Just a heads up: A colleague found a list_del corruption warning using 3.5.2 with these patches, but the warning also occurs without these patches, so I plan to do a git bisect to find out when the regression occurred. The warning appears to be for the events of the bdi->completions when blk_cleanup_queue is called during "rmmod aoe". -- Ed -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM
Currently direct mappings are created for [ 0 to max_low_pfn< --- arch/x86/include/asm/page_types.h |9 +++ arch/x86/kernel/setup.c | 125 + arch/x86/mm/init.c|2 + arch/x86/mm/init_64.c |6 +- 4 files changed, 112 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e21fdd1..409047a 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -3,6 +3,7 @@ #include #include +#include /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 @@ -40,12 +41,20 @@ #endif /* CONFIG_X86_64 */ #ifndef __ASSEMBLY__ +#include extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern struct range pfn_mapped[E820_X_MAX]; +extern int nr_pfn_mapped; + +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn); +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); +extern bool pfn_is_mapped(unsigned long pfn); + static inline phys_addr_t get_max_mapped(void) { return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 751e020..4217fb4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -115,13 +115,46 @@ #include /* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. + * max_low_pfn_mapped: highest direct mapped pfn under 4GB + * max_pfn_mapped: highest direct mapped pfn over 4GB + * + * The direct mapping only covers E820_RAM regions, so the ranges and gaps are + * represented by pfn_mapped */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; +struct range pfn_mapped[E820_X_MAX]; +int nr_pfn_mapped; + +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, +nr_pfn_mapped, start_pfn, end_pfn); + + max_pfn_mapped = max(max_pfn_mapped, end_pfn); + + if (end_pfn <= (1UL << (32 - PAGE_SHIFT))) + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn); +} + +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ + int i; + + for (i = 0; i < nr_pfn_mapped; i++) + if ((start_pfn >= pfn_mapped[i].start) && + (end_pfn <= pfn_mapped[i].end)) + return true; + + return false; +} + +bool pfn_is_mapped(unsigned long pfn) +{ + return pfn_range_is_mapped(pfn, pfn + 1); +} + #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -296,6 +329,68 @@ static void __init cleanup_highmap(void) } #endif +/* + * Iterate through E820 memory map and create direct mappings for only E820_RAM + * regions. We cannot simply create direct mappings for all pfns from + * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes in + * high addresses that cannot be marked as UC by fixed/variable range MTRRs. + * Depending on the alignment of E820 ranges, this may possibly result in using + * smaller size (i.e. 4K instead of 2M or 1G) page tables. + */ +static void __init init_memory(void) +{ + int i; + + init_gbpages(); + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 start = ei->addr; + u64 end = ei->addr + ei->size; + + /* we only map E820_RAM */ + if (ei->type != E820_RAM) + continue; + + if (end <= ISA_END_ADDRESS) + continue; + + if (start <= ISA_END_ADDRESS) + start = 0; +#ifdef CONFIG_X86_32 + /* on 32 bit, we only map up to max_low_pfn */ + if ((start >> PAGE_SHIFT) >= max_low_pfn) + continue; + + if ((end >> PAGE_SHIFT) > max_low_pfn) + end = max_low_pfn << PAGE_SHIFT; +#endif + /* the ISA range is always mapped regardless of holes */ + if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) && + start != 0) + init_memory_mapping(0, ISA_END_ADDRESS); + + init_memory_mapping(start, end); + } + +#ifdef CONFIG_X86_64 + if (max_pfn > max_low_pfn) { + /*
[PATCH 2/5] x86: find_early_table_space based on memory ranges that are being mapped
Current logic finds enough space for direct mapping page tables from 0 to end. Instead, we only need to find enough space to cover mr[0].start to mr[nr_range].end -- the range that is actually being mapped by init_memory_mapping() This patch also reportedly fixes suspend/resume issue reported in: https://lkml.org/lkml/2012/8/11/83 Signed-off-by: Jacob Shin --- arch/x86/mm/init.c | 62 +--- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2f07e09..e2b21e0 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -35,40 +35,48 @@ struct map_range { unsigned page_size_mask; }; -static void __init find_early_table_space(struct map_range *mr, unsigned long end, - int use_pse, int use_gbpages) +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - if (use_pse) { - unsigned long extra; + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr->start < PMD_SIZE) - extra += mr->end - mr->start; - - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* The first 2/4M doesn't use large pages. */ + if (mr[i].start < PMD_SIZE) + extra += range; + + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -86,7 +94,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - end - 1, pgt_buf_start << PAGE_SHIFT, + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); } @@ -257,7 +265,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(&mr[0], end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V4 0/5] x86: Create direct mappings for E820_RAM only
Currently kernel direct mappings are created for all pfns between [ 0 to max_low_pfn ) and [ 4GB to max_pfn ). When we introduce memory holes, we end up mapping memory ranges that are not backed by physical DRAM. This is fine for lower memory addresses which can be marked as UC by fixed/variable range MTRRs, however we run in to trouble with high addresses. The following patchset creates direct mappings only for E820_RAM regions between 0 ~ max_low_pfn and 4GB ~ max_pfn. And leaves non-E820_RAM and memory holes unmapped. This fourth revision of the patchset attempts to resolve comments and concerns from the following threads: * https://lkml.org/lkml/2012/8/22/680 * https://lkml.org/lkml/2012/8/13/512 * https://lkml.org/lkml/2012/8/9/536 * https://lkml.org/lkml/2011/10/20/323 Jacob Shin (5): x86: Move enabling of PSE and PGE out of init_memory_mapping x86: find_early_table_space based on memory ranges that are being mapped x86: Only direct map addresses that are marked as E820_RAM x86: Fixup code testing if a pfn is direct mapped x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix arch/x86/include/asm/page_types.h |9 +++ arch/x86/kernel/cpu/amd.c |6 +- arch/x86/kernel/setup.c | 130 - arch/x86/mm/init.c| 74 ++--- arch/x86/mm/init_64.c |6 +- arch/x86/platform/efi/efi.c |8 +-- 6 files changed, 167 insertions(+), 66 deletions(-) -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix
There could be cases where user supplied memmap=exactmap memory mappings do not mark the region where the kernel .text .data and .bss reside as E820_RAM as reported here: https://lkml.org/lkml/2012/8/14/86 Handle it by complaining, and adding the range back into the e820. Signed-off-by: Jacob Shin --- arch/x86/kernel/setup.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4217fb4..b84aceb5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); + /* +* Complain if .text .data and .bss are not marked as E820_RAM and +* attempt to fix it by adding the range. We may have a confused BIOS, +* or the user may have incorrectly supplied it via memmap=exactmap. If +* we really are running on top non-RAM, we will crash later anyways. +*/ + if (!e820_all_mapped(code_resource.start, bss_resource.end, E820_RAM)) { + pr_warn(".text .data .bss are not marked as E820_RAM!\n"); + + e820_add_region(code_resource.start, + bss_resource.end - code_resource.start + 1, + E820_RAM); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + } + trim_bios_range(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/5] x86: Fixup code testing if a pfn is direct mapped
Update code that previously assumed pfns [ 0 - max_low_pfn_mapped ) and [ 4GB - max_pfn_mapped ) were always direct mapped, to now look up pfn_mapped ranges instead. Signed-off-by: Jacob Shin --- arch/x86/kernel/cpu/amd.c |6 +- arch/x86/platform/efi/efi.c |8 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19..554ccfc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -677,11 +677,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT + if (pfn_is_mapped(tseg)) set_memory_4k((unsigned long)__va(tseg), 1); } } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660eda..f1facde 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -776,7 +776,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md, *prev_md = NULL; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages, end_pfn; + u64 end, systab, addr, npages, start_pfn, end_pfn; void *p, *va, *new_memmap = NULL; int count = 0; @@ -827,10 +827,10 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; + start_pfn = PFN_DOWN(md->phys_addr); end_pfn = PFN_UP(end); - if (end_pfn <= max_low_pfn_mapped - || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) + + if (pfn_range_is_mapped(start_pfn, end_pfn)) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size, md->type); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping
Depending on the platform, init_memory_mapping() may be called multiple times. Move it out to setup_arch() to avoid writing to cr4 on every call. Signed-off-by: Jacob Shin --- arch/x86/kernel/setup.c | 10 ++ arch/x86/mm/init.c | 10 -- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80..751e020 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p) init_gbpages(); + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __supported_pte_mask |= _PAGE_GLOBAL; + } + /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn
Re: [PATCH] regulator: disable supply regulator if it is enabled for boot-on
On Fri, Aug 24, 2012 at 11:22:05PM +0530, Laxman Dewangan wrote: > I tried to reproduce the issue but could not able to do this. > Can you please send me your board/dt files where you are porviding > platform data for regulator? > This will help me to reproduce the issue. Here's a dts patch: diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index dba53fd..386eafa 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -207,5 +207,20 @@ regulator-max-microvolt = <330>; regulator-always-on; }; + + vbat: fixedregulator@1 { + compatible = "regulator-fixed"; + regulator-name = "vbat"; + regulator-min-microvolt = <330>; + regulator-max-microvolt = <330>; + }; + + fixedregulator@2 { + compatible = "regulator-fixed"; + regulator-name = "vtest1"; + regulator-min-microvolt = <330>; + regulator-max-microvolt = <330>; + vin-supply = <&vbat>; + regulator-boot-on; + }; }; }; If you want to test it with fixed regulators, you'll need the hack below to bypass the ops->disable check in regulator_init_complete(). diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 185468c..05f3028 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -129,9 +129,16 @@ static int fixed_voltage_list_voltage(struct regulator_dev *dev, return data->microvolts; } +static int fixed_enable(struct regulator_dev *dev) +{ + return 0; +} + static struct regulator_ops fixed_voltage_ops = { .get_voltage = fixed_voltage_get_voltage, .list_voltage = fixed_voltage_list_voltage, + .disable = fixed_enable, + .enable = fixed_enable, }; static int __devinit reg_fixed_voltage_probe(struct platform_device *pdev) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ioat: Adding Ivy Bridge IOATDMA PCI device IDs
Signed-off-by: Dave Jiang --- drivers/dma/ioat/pci.c | 22 ++ 1 files changed, 22 insertions(+), 0 deletions(-) diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 5e3a40f..c057306 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -40,6 +40,17 @@ MODULE_VERSION(IOAT_DMA_VERSION); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); +#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 +#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e +#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f + static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v1 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, @@ -83,6 +94,17 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors!
-Original Message- From: Theodore Ts'o [mailto:ty...@mit.edu] Sent: Friday, August 24, 2012 6:39 PM To: Justin Piszcz Cc: linux-kernel@vger.kernel.org; linux-e...@vger.kernel.org; al piszcz Subject: Re: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors! On Fri, Aug 24, 2012 at 11:31:44AM -0400, Justin Piszcz wrote: > Hello, > > Thoughts? > > Saw this when trying to copy files to array with Samba and doing file > operations: > > [28939.505792] [ cut here ] > [29367.345433] BUG: unable to handle kernel NULL pointer dereference > at 0028 > [29367.345455] IP: [] ext4_ext_remove_space+0x89c/0xc90 Fixed by commit 89a4e48f84 in upstream. It is scheduled for inclusion in the a stable kernel series; I believe it should be in 3.5.3. Regards, - Ted -- Thanks.. if/when I come across another box I can test with I will ensure that patch (89a4e48f84 ) gets applied. For PROD hosts I need stability > 16T. Justin. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable
Hello, On Sat, Aug 25, 2012 at 12:59:25AM +0200, Sasha Levin wrote: > Thats the thing, the amount of things of things you can do with a given bucket > is very limited. You can't add entries to any point besides the head (without > walking the entire list). Kinda my point. We already have all the hlist*() interface to deal with such cases. Having something which is evidently the trivial hlist hashtable and advertises as such in the interface can be helpful. I think we need that more than we need anything fancy. Heh, this is a debate about which one is less insignificant. I can see your point. I'd really like to hear what others think on this. Guys, do we want something which is evidently trivial hlist hashtable which can use hlist_*() API directly or do we want something better encapsulated? Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ 08/32] drm/i915: correctly order the ring init sequence
On Sun, Aug 19, 2012 at 08:57:04PM -0700, Greg Kroah-Hartman wrote: > From: Greg KH > > 3.4-stable review patch. If anyone has any objections, please let me know. > > -- > > From: Daniel Vetter > > commit 0d8957c8a90bbb5d34fab9a304459448a5131e06 upstream. > > We may only start to set up the new register values after having > confirmed that the ring is truely off. Otherwise the hw might lose the > newly written register values. This is caught later on in the init > sequence, when we check whether the register writes have stuck. > > Reviewed-by: Jani Nikula > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50522 > Tested-by: Yang Guang > Signed-off-by: Daniel Vetter > Signed-off-by: Greg Kroah-Hartman I think with this commit also the following commits should be picked for 3.4 right? (as suggested for 3.0): f01db988ef6f6c70a6cc36ee71e4a98a68901229 b7884eb45ec98c0d34c7f49005ae9d4b4b4e38f6 Just reporting that I tested this 3.4.10 proposed update with the two commits above cherry-picked/backported applied, and worked ok. The first cherry-picked cleanly, while b7884eb45ec98c0d34c7f49005ae9d4b4b4e38f6 needed backporting for 3.4, like happened with 3.0, this is a proposed backport which I applied/tested, is similar to 3.0 and 3.2 versions: >From a2712ae26afde5be2bc62080755d1324164f53d3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 4 Jun 2012 11:18:15 +0200 Subject: [PATCH] drm/i915: hold forcewake around ring hw init Empirical evidence suggests that we need to: On at least one ivb machine when running the hangman i-g-t test, the rings don't properly initialize properly - the RING_START registers seems to be stuck at all zeros. Holding forcewake around this register init sequences makes chip reset reliable again. Note that this is not the first such issue: commit f01db988ef6f6c70a6cc36ee71e4a98a68901229 Author: Sean Paul Date: Fri Mar 16 12:43:22 2012 -0400 drm/i915: Add wait_for in init_ring_common added delay loops to make RING_START and RING_CTL initialization reliable on the blt ring at boot-up. So I guess it won't hurt if we do this unconditionally for all force_wake needing gpus. To avoid copy&pasting of the HAS_FORCE_WAKE check I've added a new intel_info bit for that. v2: Fixup missing commas in static struct and properly handling the error case in init_ring_common, both noticed by Jani Nikula. Cc: sta...@vger.kernel.org Reported-and-tested-by: Yang Guang Reviewed-by: Eugeni Dodonov Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50522 Signed-Off-by: Daniel Vetter [herton: backport to 3.4: - adjust for different struct intel_device_info layouts - drop changes to Haswell/Valleyview, not present in 3.4 - NEEDS_FORCE_WAKE is on i915_drv.h, and doesn't have IS_VALLEYVIEW ] Signed-off-by: Herton Ronaldo Krzesinski --- drivers/gpu/drm/i915/i915_drv.c |4 drivers/gpu/drm/i915/i915_drv.h |7 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 +--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ae8a64f..c654557 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -224,6 +224,7 @@ static const struct intel_device_info intel_sandybridge_d_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_sandybridge_m_info = { @@ -233,6 +234,7 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_d_info = { @@ -241,6 +243,7 @@ static const struct intel_device_info intel_ivybridge_d_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_m_info = { @@ -250,6 +253,7 @@ static const struct intel_device_info intel_ivybridge_m_info = { .has_bsd_ring = 1, .has_blt_ring = 1, .has_llc = 1, + .has_force_wake = 1, }; static const struct pci_device_id pciidlist[] = { /* aka */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5fabc6c..a2117b2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -255,6 +255,7 @@ struct intel_device_info { u8 is_broadwater:1; u8 is_crestline:1; u8 is_ivybridge:1; + u8 has_force_wake:1; u8 has_fbc:1; u8 has_pipe_cxsr:1; u8 has_hotplug:1; @@ -1051,6 +1052,8 @@ struct drm_i915_file_private { #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT) #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX) +#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) + #include "i915_trace.
Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable
>> Why do we need hash_head/hash_for_each_head()? I haven't stumbled on a place >> yet >> that needed direct access to the bucket itself. > > Because whole hash table walking is much less common and we can avoid > another full set of iterators. I don't agree. Out of 32 places which now use a hashtable iterator of some kind, 12 of them (38%) walk the entire table. The thing is that usually data structures are indexable by more than one key, so usually hashtables are fully walked in cold paths to look for different keys. Take kernel/workqueue.c for example: There are 4 places which do a key lookup (find_worker_executing_work()) and 3 places which fully walk the entire table (for_each_busy_worker()). >> This basically means 11 macros/functions that would let us have full >> encapsulation and will make it very easy for future implementations to work >> with >> this API instead of making up a new one. It's also not significantly (+~2-3) >> more than the ones you listed. > > I'm not sure whether full encapsulation is a good idea for trivial > hashtable. For higher level stuff, sure but at this level I think > benefits coming from known obvious implementation can be larger. > e.g. suppose the caller knows certain entries to be way colder than > others and wants to put them at the end of the chain. Thats the thing, the amount of things of things you can do with a given bucket is very limited. You can't add entries to any point besides the head (without walking the entire list). Basically you can do only two things with a bucket: - Add something to it at a very specific place. - Walk it So I don't understand whats the point in exposing the internal structure of the hashtable if there's nothing significant that can be gained from it by the user. > > So, I think implmenting the minimal set of helpers which reflect the > underlying trivial implementation explicitly could actually be better > even when discounting the reduced number of wrappers. > > Thanks. > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v7 0/4] cgroup: add xattr support
Hello, On Thu, Aug 23, 2012 at 04:53:27PM -0400, a...@redhat.com wrote: > This series are a refreshed version of a patchset submitted by Li Zefan back > in march: > https://lkml.org/lkml/2012/3/1/13 Applied to cgroup/for-3.7 w/ "Original-Patch-by: Li Zefan" added for the first three patches. * Can you please update MTA setting so that the From: header contains your full name? Importing the series to git ended up with "a...@redhat.com ". * Can you please add some comments and documentation regarding this? Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors!
On Fri, Aug 24, 2012 at 11:31:44AM -0400, Justin Piszcz wrote: > Hello, > > Thoughts? > > Saw this when trying to copy files to array with Samba and doing file > operations: > > [28939.505792] [ cut here ] > [29367.345433] BUG: unable to handle kernel NULL pointer dereference > at 0028 > [29367.345455] IP: [] ext4_ext_remove_space+0x89c/0xc90 Fixed by commit 89a4e48f84 in upstream. It is scheduled for inclusion in the a stable kernel series; I believe it should be in 3.5.3. Regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] ACPI: power: Use KERN_DEBUG when no power resources are found
On Thu, 2012-08-23 at 15:26 +0200, Borislav Petkov wrote: > On Fri, Aug 10, 2012 at 10:05:53AM +0800, Aaron Lu wrote: > > commit a606dac368eed5696fb38e16b1394f1d049c09e9 adds support to link > > devices which have _PRx, if a device does not have _PRx, a warning > > message will be printed. > > > > This commit is for ZPODD on Intel's platform, on AMD's platform, there > > is no _PRx to support ZPODD, we use _PSx. > > > > So instead of printing a useless warning message on AMD's platform, > > changing the print level to DEBUG to suppress this message. [] > > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c [] > > @@ -460,7 +460,7 @@ int acpi_power_resource_register_device(struct device > > *dev, acpi_handle handle) > > return ret; > > > > no_power_resource: > > - printk(KERN_WARNING PREFIX "Invalid Power Resource to register!"); > > + printk(KERN_DEBUG PREFIX "Invalid Power Resource to register!"); Perhaps add something like: if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) printk(etc...) instead? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/1] backlight: Add Backlight driver for lm3630 chip
On Fri, 24 Aug 2012 14:03:23 +0900 GShark Jeong wrote: > I've reviewed and tested you patch ( lm3630 and lm3639) on my real board > and these are working well . > Thank you. Great, thanks. > ( Do I need to send back this patch to you again? or will the current > status be applied for next branch? ) No, that's OK - when the time comes to send the patch upstream I shall first fold the fixup patches into the base patch and update the changelog. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Logitech USB headset not working in 3.6-rc3
On Fri, Aug 24, 2012 at 11:30:12PM +0200, Daniel Mack wrote: > On Fri, Aug 24, 2012 at 9:08 PM, Josh Boyer wrote: > > Hi All, > > > > We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't > > working with 3.6-rc3. It seems the last working kernel was based on > > commit 10c63c9, and it first stopped working with a kernel based on > > commit 23dcfa6. There are only a few ALSA commits between those > > revisions, so hopefully this is something that is fairly easy to > > identify. The only commit to USB audio in that set is: > > > > commit e9ba389c5ffc4dd29dfe17e00e4887730235 > > Author: Takashi Iwai > > Date: Wed Aug 15 12:32:00 2012 +0200 > > > > ALSA: usb-audio: Fix scheduling-while-atomic bug in PCM capture stream > > > > > > I've CC'd the reporter and attached the alsa-info is below. > > Does it work again once you revert that commit? Haven't built a kernel with that done yet. Had a few other things pop up this afternoon. If Bruno doesn't build one himself, I'll try to get one built later this evening for testing. josh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Logitech USB headset not working in 3.6-rc3
Hi, On 24.08.2012 21:08, Josh Boyer wrote: > We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't > working with 3.6-rc3. It seems the last working kernel was based on > commit 10c63c9, and it first stopped working with a kernel based on > commit 23dcfa6. There are only a few ALSA commits between those > revisions, so hopefully this is something that is fairly easy to > identify. The only commit to USB audio in that set is: [...] > !!ALSA/HDA dmesg > !!-- > > [ 38.190306] SELinux: initialized (dev configfs, type configfs), uses > genfs_contexts > [ 38.229616] snd_hda_intel :00:1b.0: irq 66 for MSI/MSI-X > [ 38.270699] ALSA sound/usb/mixer.c:866 6:0: cannot get min/max values for > control 2 (id 6) > [ 38.274097] ALSA sound/usb/mixer.c:866 1:0: cannot get min/max values for > control 2 (id 1) > [ 38.276753] ALSA sound/usb/mixer.c:866 2:0: cannot get min/max values for > control 2 (id 2) > [ 38.279322] ALSA sound/pci/hda/hda_auto_parser.c:322 autoconfig: > line_outs=1 (0xe/0x0/0x0/0x0/0x0) type:line > [ 38.279326] ALSA sound/pci/hda/hda_auto_parser.c:326speaker_outs=1 > (0x11/0x0/0x0/0x0/0x0) > [ 38.279329] ALSA sound/pci/hda/hda_auto_parser.c:330hp_outs=1 > (0xd/0x0/0x0/0x0/0x0) > [ 38.279331] ALSA sound/pci/hda/hda_auto_parser.c:331mono: mono_out=0x0 > [ 38.279333] ALSA sound/pci/hda/hda_auto_parser.c:335inputs: > [ 38.279336] ALSA sound/pci/hda/hda_auto_parser.c:339 Mic=0x10 > [ 38.279342] ALSA sound/pci/hda/hda_auto_parser.c:339 Line=0xf > [ 38.285983] usbcore: registered new interface driver snd-usb-audio Also, according to this dmesg, the device is probed just fine (which the commit you mentioned wouldn't change anything about though). So what does "isn't working anymore" refer to precisely? Are there any more dmesg entries generated once the stream is started? Daniel -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/fpu] x86, fpu: use non-lazy fpu restore for processors supporting xsave
Commit-ID: 127f5403bfbc5f52cf0fbbadfa5e624a32a137ff Gitweb: http://git.kernel.org/tip/127f5403bfbc5f52cf0fbbadfa5e624a32a137ff Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:13:02 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:54 -0700 x86, fpu: use non-lazy fpu restore for processors supporting xsave Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.sid...@intel.com Cc: Jim Kukunas Cc: NeilBrown Cc: Avi Kivity Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 96 +++ arch/x86/include/asm/i387.h |1 + arch/x86/include/asm/xsave.h|1 + arch/x86/kernel/i387.c | 20 ++- arch/x86/kernel/process.c | 12 +++-- arch/x86/kernel/process_32.c|4 -- arch/x86/kernel/process_64.c|4 -- arch/x86/kernel/traps.c |5 ++- arch/x86/kernel/xsave.c | 57 + 9 files changed, 140 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index fac39e9..e31cc6e 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_xsave()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_xsave()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" +"2:\n" +_ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* +* Forget coprocessor state.. +*/ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_xsave()) + drop_fpu(tsk); + else + xrstor_state(init_xstate_buf, -1); +} + /* * FPU state switching for scheduling. * @@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* +* If the task has used the math, pre-load the FPU on xsave processors +* or if the past 5 consecutive context-switches used math. +*/ + fpu.preload = tsk_used_math(new) && (use_xsave() || +new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_xsave()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0
[tip:x86/fpu] lguest, x86: handle guest TS bit for lazy/ non-lazy fpu host models
Commit-ID: 1ce83ffda9aea53e6e4b6b6a82c028a019526010 Gitweb: http://git.kernel.org/tip/1ce83ffda9aea53e6e4b6b6a82c028a019526010 Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:13:01 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:52 -0700 lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models Instead of using unlazy_fpu() check if user_has_fpu() and set/clear the host TS bits so that the lguest works fine with both the lazy/non-lazy FPU host models with minimal changes. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-6-git-send-email-suresh.b.sid...@intel.com Cc: Rusty Russell Signed-off-by: H. Peter Anvin --- drivers/lguest/x86/core.c | 10 +++--- 1 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 39809035..4af12e1 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ - if (cpu->ts) - unlazy_fpu(current); + if (cpu->ts && user_has_fpu()) + stts(); /* * SYSENTER is an optimized way of doing system calls. We can't allow @@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + /* Clear the host TS bit if it was set above. */ + if (cpu->ts && user_has_fpu()) + clts(); + /* * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we @@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * a different CPU. So all the critical stuff should be done * before this. */ - else if (cpu->regs->trapnum == 7) + else if (cpu->regs->trapnum == 7 && !user_has_fpu()) math_state_restore(); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH drm-next 3/3] drm/i915/contexts: Fixup merge with commit b6c7488df68a
This is a fixup patch for the merge of drm-next into linux-next caused by commit b6c7488df68a ("drm/i915/contexts: fix list corruption"). Reported-By: Stephen Rothwell Signed-off-by: Sedat Dilek --- drivers/gpu/drm/i915/i915_gem.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4f6841d..e8a5cb2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2273,11 +2273,11 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { - ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); + ret = i915_ring_idle(ring); if (ret) return ret; - ret = i915_ring_idle(ring); + ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); if (ret) return ret; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH drm-next 2/3] drm/i915: Remove reference to drm_display_info raw_edid field
Reported-By: Stephen Rothwell Acked-by: Jani Nikula Acked-by: Dave Airlie Signed-off-by: Sedat Dilek --- drivers/gpu/drm/i915/intel_modes.c |1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 29b7259..4bc1c0f 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -45,7 +45,6 @@ int intel_connector_update_modes(struct drm_connector *connector, drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); drm_edid_to_eld(connector, edid); - connector->display_info.raw_edid = NULL; kfree(edid); return ret; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH drm-next 1/3] drm/udl: usb: Fix recursive Kconfig dependency
In drivers/usb/Kconfig "config USB_ARCH_HAS_HCD" is within "if USB_SUPPORT" statement. In drivers/gpu/drm/Kconfig "config DRM_USB" depends on USB_ARCH_HAS_HCD but selects USB_SUPPORT which leads to the error for udl Kconfig: $ yes "" | make oldconfig scripts/kconfig/conf --oldconfig Kconfig drivers/gpu/drm/udl/Kconfig:1:error: recursive dependency detected! drivers/gpu/drm/udl/Kconfig:1: symbol DRM_UDL depends on USB_ARCH_HAS_HCD drivers/usb/Kconfig:76: symbol USB_ARCH_HAS_HCD depends on USB_SUPPORT drivers/usb/Kconfig:58: symbol USB_SUPPORT is selected by DRM_USB drivers/gpu/drm/Kconfig:22: symbol DRM_USB is selected by DRM_UDL Fix this by changing from select to depends on USB_SUPPORT in "config DRM_USB". This is a follow-up fix to df0b344300724e00db9fff7eb6406eb91f450b91 in Dave's drm-next GIT branch. [ v2: Restore old status, but change from select to depends on USB_SUPPORT ] [ v3: Use common prefix "drm/udl" in label ] Signed-off-by: Sedat Dilek --- drivers/gpu/drm/Kconfig |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 3a8c683..0cbdc45 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -22,9 +22,8 @@ menuconfig DRM config DRM_USB tristate depends on DRM - depends on USB_ARCH_HAS_HCD + depends on USB_SUPPORT && USB_ARCH_HAS_HCD select USB - select USB_SUPPORT config DRM_KMS_HELPER tristate -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/fpu] x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage
Commit-ID: 964735018df03c94dd12665385d59e3b2c7c08b8 Gitweb: http://git.kernel.org/tip/964735018df03c94dd12665385d59e3b2c7c08b8 Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:13:00 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:50 -0700 x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and saving/restoring just the few used xmm/ymm registers. This has some advantages like: * If the task's FPU state is already active, then kernel_fpu_begin() will just save the user-state and avoiding the read/write of cr0. In general, cr0 accesses are much slower. * Manual save/restore of xmm/ymm registers will affect the 'modified' and the 'init' optimizations brought in the by xsaveopt/xrstor infrastructure. * Foward compatibility with future vector register extensions will be a problem if the xmm/ymm registers are manually saved and restored (corrupting the extended state of those vector registers). With this patch, there was no significant difference in the xor throughput using AVX, measured during boot. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-5-git-send-email-suresh.b.sid...@intel.com Cc: Jim Kukunas Cc: NeilBrown Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xor_32.h | 56 +--- arch/x86/include/asm/xor_64.h | 61 ++-- arch/x86/include/asm/xor_avx.h | 54 --- 3 files changed, 29 insertions(+), 142 deletions(-) diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 4545708..aabd585 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save)\ - : "memory");\ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save)\ - : "memory");\ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x)"16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #definePF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732
[tip:x86/fpu] x86, kvm: use kernel_fpu_begin/end() in kvm_load/ put_guest_fpu()
Commit-ID: 98700fa647b3572f7fa55485570ab9fc53b91d23 Gitweb: http://git.kernel.org/tip/98700fa647b3572f7fa55485570ab9fc53b91d23 Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:12:59 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:49 -0700 x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu() kvm's guest fpu save/restore should be wrapped around kernel_fpu_begin/end(). This will avoid for example taking a DNA in kvm_load_guest_fpu() when it tries to load the fpu immediately after doing unlazy_fpu() on the host side. More importantly this will prevent the host process fpu from being corrupted. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-4-git-send-email-suresh.b.sid...@intel.com Cc: Avi Kivity Signed-off-by: H. Peter Anvin --- arch/x86/kvm/x86.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be6d549..b92cc39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5954,7 +5954,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5968,6 +5968,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/fpu] x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()
Commit-ID: cc50fae05beb2db9f4587bbb1a0d6aba2af5b407 Gitweb: http://git.kernel.org/tip/cc50fae05beb2db9f4587bbb1a0d6aba2af5b407 Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:12:58 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:48 -0700 x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig() Few lines below we do drop_fpu() which is more safer. Remove the unnecessary user_fpu_end() in save_xstate_sig(), which allows the drop_fpu() to ignore any pending exceptions from the user-space and drop the current fpu. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-3-git-send-email-suresh.b.sid...@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 17 +++-- arch/x86/kernel/xsave.c |1 - 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index fe95ad0..fac39e9 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -412,22 +412,11 @@ static inline void __drop_fpu(struct task_struct *tsk) } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 6cfc7d9..f0bb844 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -254,7 +254,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) fpu_fxsave(&tsk->thread.fpu); - user_fpu_end(); } else { sanitize_i387_state(tsk); if (__copy_to_user(buf_fx, xsave, xstate_size)) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
perf backtraces off-by-1
Some of our language runtimes like to map IP addresses in perf backtrace to specific byte codes. The way things stand now, the addresses on the backtrace are return addresses, rather than the caller. I think this issue may be present for other unusual call/return sequences where the user may be more interested in the calling instruction rather than the instruction control flow would return to. A simple hack such as the one below makes our JIT guys happy. But the code is not right if there was an asynchronous transfer of control (eg: signal handler or interrupt). libunwind contains similar code, but has the additional info in the unwind information to recognize async control transfer. Wondering if this has been discussed before. One option is to support this for user mode only, with code to detect signal frames. Any other ideas? -Arun --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -296,6 +296,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; int err; + int async; callchain_cursor_reset(&evsel->hists.callchain_cursor); @@ -322,6 +323,11 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, continue; } + /* XXX: check if this was an async control transfer */ + async = 0; +if (!async) { + ip--; + } al.filtered = false; thread__find_addr_location(thread, self, cpumode, MAP__FUNCTION, ip, &al, NULL); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/fpu] x86, fpu: drop_fpu() before restoring new state from sigframe
Commit-ID: 739390035c5fba2132fa424309786ff7bdd2cc1e Gitweb: http://git.kernel.org/tip/739390035c5fba2132fa424309786ff7bdd2cc1e Author: Suresh Siddha AuthorDate: Fri, 24 Aug 2012 14:12:57 -0700 Committer: H. Peter Anvin CommitDate: Fri, 24 Aug 2012 14:26:47 -0700 x86, fpu: drop_fpu() before restoring new state from sigframe No need to save the state with unlazy_fpu(), that is about to get overwritten by the state from the signal frame. Instead use drop_fpu() and continue to restore the new state. Also fold the stop_fpu_preload() into drop_fpu(). Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-2-git-send-email-suresh.b.sid...@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h |7 +-- arch/x86/kernel/xsave.c |8 +++- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index ba83a08..fe95ad0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk) preempt_enable(); } -static inline void stop_fpu_preload(struct task_struct *tsk) -{ - tsk->fpu_counter = 0; -} - static inline void drop_fpu(struct task_struct *tsk) { /* * Forget coprocessor state.. */ - stop_fpu_preload(tsk); + tsk->fpu_counter = 0; preempt_disable(); __drop_fpu(tsk); preempt_enable(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a23d100..6cfc7d9 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -381,16 +381,14 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; struct user_i387_ia32_struct env; - stop_fpu_preload(tsk); - unlazy_fpu(tsk); + drop_fpu(tsk); if (__copy_from_user(xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - drop_fpu(tsk); + __copy_from_user(&env, buf, sizeof(env))) return -1; - } sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); } else { /* * For 64-bit frames and 32-bit fsave frames, restore the user -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] fork: fix oops after fork failure
On Thu, 23 Aug 2012 19:36:08 +0400 Glauber Costa wrote: > When we want to duplicate a new process, dup_task_struct() will undergo > a series of allocations. If alloc_thread_info_node() fails, we call > free_task_struct() and return. > > This seems right, but it is not. free_task_struct() will not only free > the task struct from the kmem_cache, but will also call > arch_release_task_struct(). The problem is that this function is > supposed to undo whatever arch-specific work done by > arch_dup_task_struct(), that is not yet called at this point. The > particular problem I ran accross was that in x86, we will arrive at > fpu_free() without having ever allocated it. I think ths was already fixed by f19b9f74b7ea3b ("fork: fix error handling in dup_task()"). As you would have noticed if you were preparing patches against up-to-date kernel versions! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] mm: hugetlb: add arch hook for clearing page flags before entering pool
On Thu, 23 Aug 2012 18:36:02 +0100 Will Deacon wrote: > On Thu, Aug 23, 2012 at 06:11:56PM +0100, Michal Hocko wrote: > > On Thu 23-08-12 17:37:13, Will Deacon wrote: > > > The core page allocator ensures that page flags are zeroed when freeing > > > pages via free_pages_check. A number of architectures (ARM, PPC, MIPS) > > > rely on this property to treat new pages as dirty with respect to the > > > data cache and perform the appropriate flushing before mapping the pages > > > into userspace. > > > > > > This can lead to cache synchronisation problems when using hugepages, > > > since the allocator keeps its own pool of pages above the usual page > > > allocator and does not reset the page flags when freeing a page into > > > the pool. > > > > > > This patch adds a new architecture hook, arch_clear_hugepage_flags, so > > > that architectures which rely on the page flags being in a particular > > > state for fresh allocations can adjust the flags accordingly when a > > > page is freed into the pool. You could have used __weak here quite neatly, but whatever. > Next step: start posting the ARM code! I suggest you keep this patch in whichever tree holds that arm code. If I see this patch turn up in linux-next then I'll just drop my copy, expecting that this patch will be merged alongside the ARM changes. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH can-next v6] can: add tx/rx LED trigger support
Hello Kurt, On Fri, Aug 24, 2012 at 02:42:48PM +0200, Kurt Van Dijck wrote: > On Fri, Aug 24, 2012 at 01:28:16PM +0200, Marc Kleine-Budde wrote: > > On 08/24/2012 07:10 AM, Kurt Van Dijck wrote: > > > Hello, > > > > > > I find the CAN led triggers an interesting thing. > > > > > > And then, this scenario fell crossed my mind: > > > Imagine I do: > > > [insert CAN device: can0] > > > $ ip link set can0 name helga > > > [insert another CAN device: again 'can0'] > > > > > > Registering 'can0-tx' led trigger will fail for the second CAN device, > > > since that led trigger name is already reserved for CAN device 'helga'. > > Good point. Yep, thanks for pointing that out! Interface renaming was something I considered when I first wrote the code and I had the mac80211-led driver in mind, as that driver uses the phy name and not the netdev one for its triggers. The reason why I did not care that much in the end is that on SoC based systems trigger-led association is made at probe time, based on data either from platform_data or devicetree, so I imagined that once the kernel is ported to the board and default triggers are set correctly at boot time, the userspace is free to rename CAN interfaces and nobody should notice... :^) The thing I did not consider are hot-plug interfaces mixed with renaming, such as in the case you pointed out - it's probably not really common but still possible. > > > I'm not sure how to fix such. > > > If 'rx' & 'tx' may be combined, reusing the netdev name may be possible? > > > Just wild thinking ... > > > > I think the device's name (not netdev) is unique in the system and > > cannot be changed. > > but may contain several netdev's ... Ouch. > > > > > On my device tree enabled mx28 I'm talking about the "80032000.can" in: > > You idea triggered another thougt: since control is put in device drivers, > why putting the name in the generic can_dev struct? Why not? That makes the API easy. > A more flexible approach to assign names is the key to success here. > The correct 'works in all conditions' approach is not yet in my sight :-( Agreed. What about using a combination of device name + an optional port index specified in devm_can_led_init()? (something like to platform_device names) Of course that would require changing the API for libraries like register_sja1000dev(), to add a port index. Fabio -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/3] HWPOISON: prevent inode cache removal to keep AS_HWPOISON sticky
Hello, On Thu, Aug 23, 2012 at 04:31:43PM -0400, Naoya Horiguchi wrote: > On Thu, Aug 23, 2012 at 05:11:25PM +0800, Fengguang Wu wrote: > > On Wed, Aug 22, 2012 at 11:17:35AM -0400, Naoya Horiguchi wrote: ... > > > diff --git v3.6-rc1.orig/fs/inode.c v3.6-rc1/fs/inode.c > > > index ac8d904..8742397 100644 > > > --- v3.6-rc1.orig/fs/inode.c > > > +++ v3.6-rc1/fs/inode.c > > > @@ -717,6 +717,15 @@ void prune_icache_sb(struct super_block *sb, int > > > nr_to_scan) > > > } > > > > > > /* > > > + * Keep inode caches on memory for user processes to certainly > > > + * be aware of memory errors. > > > + */ > > > + if (unlikely(mapping_hwpoison(inode->i_mapping))) { > > > + spin_unlock(&inode->i_lock); > > > + continue; > > > + } > > > > That chunk prevents reclaiming all the cached pages. However the intention > > is only to keep the struct inode together with the hwpoison bit? > > Yes, we can not reclaim pagecaches from shrink_slab(), but we can do from > shrink_zone(). So it shouldn't happen that cached pages on hwpoisoned file > remain for long under high memory pressure. I might lose your point. Are you suggesting this chunk should come after if (inode_has_buffers(inode) || inode->i_data.nrpages) { ... } block, aren't you? I think that's right, so I'll try and test it this weekend. > > > + /* > > >* Referenced or dirty inodes are still in use. Give them > > >* another pass through the LRU as we canot reclaim them now. > > >*/ > > > @@ -1405,6 +1414,9 @@ static void iput_final(struct inode *inode) > > > inode->i_state &= ~I_WILL_FREE; > > > } > > > > > > + if (unlikely(mapping_hwpoison(inode->i_mapping) && drop)) > > > + mapping_clear_hwpoison(inode->i_mapping); > > > > Is that clear necessary? Because the bit will be gone with the inode > > struct: it's going to be de-allocated anyway. > > With the chunk in prune_icache_sb() we keep the inode struct with > AS_HWPOISON set on memory, so in order to remove it, we need explicitly > clear the bit. > Without this clear, the inode remains until system reboot. And again, you are right here. Without this clear, this inode will be cleared in destroy_inode(). Thanks, Naoya -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] mm/mmu_notifier: init notifier if necessary
On Fri, 24 Aug 2012 22:37:55 +0800 Wanpeng Li wrote: > From: Gavin Shan > > While registering MMU notifier, new instance of MMU notifier_mm will > be allocated and later free'd if currrent mm_struct's MMU notifier_mm > has been initialized. That cause some overhead. The patch tries to > eleminate that. > > Signed-off-by: Gavin Shan > Signed-off-by: Wanpeng Li > --- > mm/mmu_notifier.c | 22 +++--- > 1 files changed, 11 insertions(+), 11 deletions(-) > > diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c > index 862b608..fb4067f 100644 > --- a/mm/mmu_notifier.c > +++ b/mm/mmu_notifier.c > @@ -192,22 +192,23 @@ static int do_mmu_notifier_register(struct mmu_notifier > *mn, > > BUG_ON(atomic_read(&mm->mm_users) <= 0); > > - ret = -ENOMEM; > - mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); > - if (unlikely(!mmu_notifier_mm)) > - goto out; > - > if (take_mmap_sem) > down_write(&mm->mmap_sem); > ret = mm_take_all_locks(mm); > if (unlikely(ret)) > - goto out_cleanup; > + goto out; > > if (!mm_has_notifiers(mm)) { > + mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), > + GFP_ATOMIC); Why was the code switched to the far weaker GFP_ATOMIC? We can still perform sleeping allocations inside mmap_sem. > + if (unlikely(!mmu_notifier_mm)) { > + ret = -ENOMEM; > + goto out_of_mem; > + } > INIT_HLIST_HEAD(&mmu_notifier_mm->list); > spin_lock_init(&mmu_notifier_mm->lock); > + > mm->mmu_notifier_mm = mmu_notifier_mm; > - mmu_notifier_mm = NULL; > } > atomic_inc(&mm->mm_count); > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V4] mfd: add MAX8907 core driver
On 08/15/2012 10:28 AM, Stephen Warren wrote: > From: Gyungoh Yoo > > The MAX8907 is an I2C-based power-management IC containing voltage > regulators, a reset controller, a real-time clock, and a touch-screen > controller. Samuel, Does this look OK now? (although you're probably traveling to a conference right now...) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] fs/proc: Move kfree outside pde_unload_lock
On Fri, Aug 24, 2012 at 11:45:45AM -0500, Nathan Zimmer wrote: > On 08/24/2012 09:58 AM, Eric Dumazet wrote: >> Le vendredi 24 août 2012 à 09:48 -0500, Nathan Zimmer a écrit : >>> On Wed, Aug 22, 2012 at 11:42:58PM +0200, Eric Dumazet wrote: On Wed, 2012-08-22 at 20:28 +0200, Eric Dumazet wrote: > Thats interesting, but if you really want this to fly, one RCU > conversion would be much better ;) > > pde_users would be an atomic_t and you would avoid the spinlock > contention. Here is what I had in mind, I would be interested to know how it helps a 512 core machine ;) >>> Here are the results and they look great. >>> >>> cpuinfo baselinemoved kfree Rcu >>> tasks read-secread-secread-sec >>> 1 0.0141 0.0141 0.0141 >>> 2 0.0140 0.0140 0.0142 >>> 4 0.0140 0.0141 0.0141 >>> 8 0.0145 0.0145 0.0140 >>> 16 0.0553 0.0548 0.0168 >>> 32 0.1688 0.1622 0.0549 >>> 64 0.5017 0.3856 0.1690 >>> 128 1.7005 0.9710 0.5038 >>> 256 5.2513 2.6519 2.0804 >>> 512 8.0529 6.2976 3.0162 >>> >>> >>> >> Indeed... >> >> Could you explicit the test you are actually doing ? >> >> Thanks >> >> > > > It is a dead simple test. > The test starts by forking off X number of tasks > assigning each their own cpu. > Each task then allocs a bit of memory. > All tasks wait on a memory cell for the go order. > We measure the read time starting here. > Once the go order is given they all read a chunk of the selected proc file. > I was using /proc/cpuinfo to test. > Once everyone has finished we take the end read time. > Here is the text for those who are curious. /**/ char *helpstr[] = { "This test program is a generic template.", 0 }; #include #include #include #include #include #include #include #include #include #include #include #include //#include "setup.h" #define MAXCPUS 4096 #define perrorx(s) do { perror(s); exit(1);} while(0) #define mb()asm volatile("mfence":::"memory") #define barrier() asm volatile("": : :"memory") #define cpu_relax() asm volatile ("rep;nop":::"memory"); extern int optind, opterr; extern char *optarg; static int verbose = 0; static int header = 0; static char *file = "/proc/stat"; static int numtasks = 1; static int repeat = 1; static int bufsize = 1024; struct control_s { int ready; int done; int go; int exit; } *cntl; static cpu_set_t *defmask; static int cpu_set_size; static void runon_init(void) { if (!defmask) { cpu_set_size = CPU_ALLOC_SIZE(MAXCPUS); defmask = CPU_ALLOC(MAXCPUS); if (sched_getaffinity(0, cpu_set_size, defmask) < 0) perrorx("unexpected failure in runon_init"); } } static double timeInSeconds(long time_in_microseconds) { double temp; temp = time_in_microseconds; temp /= 100; return temp; } static int runon(int cpu) { cpu_set_t *mask; runon_init(); mask = CPU_ALLOC(MAXCPUS); if (cpu < 0 || cpu >= MAXCPUS) return -1; CPU_ZERO_S(cpu_set_size, mask); CPU_SET_S(cpu, cpu_set_size, mask); if (sched_setaffinity(0, cpu_set_size, mask) < 0) return -1; CPU_FREE(mask); return 0; } static long getCurrentTime() { struct timeval tp; long usec; mb(); gettimeofday(&tp, 0); usec = tp.tv_sec * 100 + tp.tv_usec; mb(); return usec; } static void do_help(void) { char **p; for (p = helpstr; *p; p++) printf("%s\n", *p); exit(0); } static void slave(int id) { FILE *f; int i; char *buf; runon(id); buf = malloc(bufsize); memset(buf, 0, bufsize); if ((f = fopen(file, "r")) < 0) perrorx("open failed"); while (fgets(buf, bufsize, f) != NULL) { } fclose(f); (void)__sync_fetch_and_add(&cntl->ready, 1); while (!cntl->go) cpu_relax(); for (i = 0; i < repeat; i++) { if ((f = fopen(file, "r")) < 0) perrorx("open failed"); while (fgets(buf, bufsize, f) != NULL) { } fclose(f); barrier(); } (void)__sync_fetch_and_add(&cntl->done, 1); while (!cntl->exit) cpu_relax(); exit(0); } int main(int argc, char **argv) { int i, c, stat, er = 0; static char optstr[] = "b:f:hn:r:v"; unsigned long t, tfork, tready, tread, texit; opterr = 1; while ((c = getopt(argc, argv, optstr)) != EOF) switch (c) { case 'b': bufsize = atoi(optarg); break; case 'f': file = optarg; break; case 'h': header++; break; case 'n': numtasks = atoi(optarg); break; case 'r': repeat = atoi(optarg); break; case 'v': verbose++; break;
Re: [PATCH 0/6] x86, fpu: cleanups, introduce non-lazy FPU restore for xsave
I have applied this to tip:x86/fpu, but I have also asked Suresh to prepare a followon patch to decouple eager save from the existence of the XSAVE instruction. It seems pretty clear that eager save is a net benefit in the presence of the XSAVEOPT, but it isn't as clear for only having XSAVE, as far as I can tell. Either way it would seem to be a policy decision that is somewhat separate from the exact instruction. -hpa -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Logitech USB headset not working in 3.6-rc3
On Fri, Aug 24, 2012 at 9:08 PM, Josh Boyer wrote: > Hi All, > > We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't > working with 3.6-rc3. It seems the last working kernel was based on > commit 10c63c9, and it first stopped working with a kernel based on > commit 23dcfa6. There are only a few ALSA commits between those > revisions, so hopefully this is something that is fairly easy to > identify. The only commit to USB audio in that set is: > > commit e9ba389c5ffc4dd29dfe17e00e4887730235 > Author: Takashi Iwai > Date: Wed Aug 15 12:32:00 2012 +0200 > > ALSA: usb-audio: Fix scheduling-while-atomic bug in PCM capture stream > > > I've CC'd the reporter and attached the alsa-info is below. Does it work again once you revert that commit? Daniel > [1] https://bugzilla.redhat.com/show_bug.cgi?id=851619 > > upload=true&script=true&cardinfo= > !! > !!ALSA Information Script v 0.4.60 > !! > > !!Script ran on: Fri Aug 24 18:35:42 UTC 2012 > > > !!Linux Distribution > !!-- > > Fedora release 18 (Rawhide) Fedora release 18 (Rawhide) NAME=Fedora ID=fedora > PRETTY_NAME="Fedora 18 (Rawhide)" CPE_NAME="cpe:/o:fedoraproject:fedora:18" > Fedora release 18 (Rawhide) Fedora release 18 (Rawhide) > > > !!DMI Information > !!--- > > Manufacturer: Dell Inc. > Product Name: Precision WorkStation 690 > Product Version: > > > !!Kernel Information > !!-- > > Kernel release:3.6.0-0.rc3.git0.1.fc18.x86_64 > Operating System: GNU/Linux > Architecture: x86_64 > Processor: x86_64 > SMP Enabled: Yes > > > !!ALSA Version > !! > > Driver version: 1.0.25 > Library version:1.0.25 > Utilities version: 1.0.25 > > > !!Loaded ALSA modules > !!--- > > snd_hda_intel > snd_usb_audio > > > !!Sound Servers on this system > !! > > Pulseaudio: > Installed - Yes (/usr/bin/pulseaudio) > Running - Yes > > aRts: > Installed - Yes (/usr/bin/artsd) > Running - No > > Jack: > Installed - Yes (/usr/bin/jackd) > Running - No > > > !!Soundcards recognised by ALSA > !!- > > 0 [Intel ]: HDA-Intel - HDA Intel > HDA Intel at 0xfcffc000 irq 66 > 1 [Headset]: USB-Audio - Logitech USB Headset > Logitech Logitech USB Headset at usb-:00:1d.7-6.4, > full speed > > > !!PCI Soundcards installed in the system > !!-- > > 00:1b.0 Audio device: Intel Corporation 631xESB/632xESB High Definition Audio > Controller (rev 09) > > > !!Advanced information - PCI Vendor/Device/Subsystem ID's > !! > > 00:1b.0 0403: 8086:269a (rev 09) > Subsystem: 1028:01c0 > > > !!Loaded sound module options > !!-- > > !!Module: snd_hda_intel > align_buffer_size : -1 > bdl_pos_adj : > 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 > beep_mode : > N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N > enable : > Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y > enable_msi : -1 > id : > (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) > index : > -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 > model : > (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) > patch : > (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null) > position_fix : > 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 > power_save : 0 > power_save_controller : Y > probe_mask : > -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 > probe_only : > 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 > single_cmd : N > snoop : Y > > !!Module: snd_usb_audio > async_unlink : Y > device_setup : > 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 > enable : > Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y > id : > (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),
Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2
On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote: > Hi, > > Changes since v1: > > - Fixed preempt handling in alpha idle loop > - added ack from Geert > - fixed stable email address, sorry :-/ > > This time I built tested everywhere but: h8300 (compiler internal error), > and mn10300, parisc, score (cross compilers not available in > ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/) > > For testing, you can pull from: > > git://github.com/fweisbec/linux-dynticks.git > rcu/idle-fix-v2 > > Thanks. I have queued these on -rcu branch rcu/idle: git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git This problem has been in place since 3.3, so it is hard to argue that it is a regression for this merge window. I have therefore queued it for 3.7. Thanx, Paul > Frederic Weisbecker (11): > alpha: Fix preemption handling in idle loop > alpha: Add missing RCU idle APIs on idle loop > cris: Add missing RCU idle APIs on idle loop > frv: Add missing RCU idle APIs on idle loop > h8300: Add missing RCU idle APIs on idle loop > m32r: Add missing RCU idle APIs on idle loop > m68k: Add missing RCU idle APIs on idle loop > mn10300: Add missing RCU idle APIs on idle loop > parisc: Add missing RCU idle APIs on idle loop > score: Add missing RCU idle APIs on idle loop > xtensa: Add missing RCU idle APIs on idle loop > > arch/alpha/kernel/process.c |6 +- > arch/alpha/kernel/smp.c |1 + > arch/cris/kernel/process.c|3 +++ > arch/frv/kernel/process.c |3 +++ > arch/h8300/kernel/process.c |3 +++ > arch/m32r/kernel/process.c|3 +++ > arch/m68k/kernel/process.c|3 +++ > arch/mn10300/kernel/process.c |3 +++ > arch/parisc/kernel/process.c |3 +++ > arch/score/kernel/process.c |4 +++- > arch/xtensa/kernel/process.c |3 +++ > 11 files changed, 33 insertions(+), 2 deletions(-) > > -- > 1.7.5.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 03/23] serial: omap: don't access the platform_device
* Felipe Balbi [120823 03:37]: > The driver doesn't need to know about its platform_device. > > Everything the driver needs can be done through the > struct device pointer. In case we need to use the > OMAP-specific PM function pointers, those can make > sure to find the device's platform_device pointer > so they can find the struct omap_device through > pdev->archdata field. > > Tested-by: Shubhrajyoti D > Acked-by: Santosh Shilimkar > Signed-off-by: Felipe Balbi Acked-by: Tony Lindgren -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable
Hello, On Fri, Aug 24, 2012 at 10:53:45PM +0200, Sasha Levin wrote: > Yup, but we could be using the same API for dynamic non-resizable and static > if > we go with the DECLARE/hash_init. We could switch between them (and other > implementations) without having to change the code. I think it's better to stick with the usual conventions. > > * DECLARE/DEFINE > > * hash_head() > > * hash_for_each_head() > > * hash_add*() > > * hash_for_each_possible*() > * hash_for_each*() ? > > Why do we need hash_head/hash_for_each_head()? I haven't stumbled on a place > yet > that needed direct access to the bucket itself. Because whole hash table walking is much less common and we can avoid another full set of iterators. > This basically means 11 macros/functions that would let us have full > encapsulation and will make it very easy for future implementations to work > with > this API instead of making up a new one. It's also not significantly (+~2-3) > more than the ones you listed. I'm not sure whether full encapsulation is a good idea for trivial hashtable. For higher level stuff, sure but at this level I think benefits coming from known obvious implementation can be larger. e.g. suppose the caller knows certain entries to be way colder than others and wants to put them at the end of the chain. So, I think implmenting the minimal set of helpers which reflect the underlying trivial implementation explicitly could actually be better even when discounting the reduced number of wrappers. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 02/14] aoe: kernel thread handles I/O completions for simple locking
On Fri, 17 Aug 2012 21:24:08 -0400 Ed Cashin wrote: > This patch makes the frames the aoe driver uses to track the > relationship between bios and packets more flexible and detached, so > that they can be passed to an "aoe_ktio" thread for completion of I/O. > > The frames are handled much like skbs, with a capped amount of > preallocation so that real-world use cases are likely to run smoothly > and degenerate gracefully even under memory pressure. > > Decoupling I/O completion from the receive path and serializing it in > a process makes it easier to think about the correctness of the > locking in the driver, especially in the case of a remote MAC address > becoming unusable. > > ... > > +static int > +kthread(void *vp) > +{ > + struct ktstate *k; > + DECLARE_WAITQUEUE(wait, current); > + sigset_t blocked; > + int more; > + > + k = vp; > +#ifdef PF_NOFREEZE PF_NOFREEZE can never be undefined. > + current->flags |= PF_NOFREEZE; > +#endif > + set_user_nice(current, -10); > + sigfillset(&blocked); > + sigprocmask(SIG_BLOCK, &blocked, NULL); > + flush_signals(current); This is a kernel thread - it shouldn't need to fiddle with signals. > + complete(&k->rendez); That's odd. Why do a complete() before we even start? A code comment is needed if this is indeed correct. > + do { > + __set_current_state(TASK_UNINTERRUPTIBLE); I think this statement is simply unneeded. > + spin_lock_irq(k->lock); > + more = k->fn(); > + if (!more) { > + add_wait_queue(k->waitq, &wait); > + __set_current_state(TASK_INTERRUPTIBLE); > + } > + spin_unlock_irq(k->lock); > + if (!more) { > + schedule(); > + remove_wait_queue(k->waitq, &wait); > + } else > + cond_resched(); Here we can do a cond_resched() when in state TASK_INTERRUPTIBLE. Such a schedule() will never return unless some other thread flips this task into state TASK_RUNNING. But if another thread does that, we should have been on that waitqueue! It seems all confused and racy. > + } while (!kthread_should_stop()); > + __set_current_state(TASK_RUNNING); I don't think there's any path by which we can get here in any state other than TASK_RUNNING. > + complete(&k->rendez); > + return 0; > +} This function might be a bit neater if it were to use prepare_to_wait()/finish_wait(). > +static void > +aoe_ktstop(struct ktstate *k) > +{ > + kthread_stop(k->task); > + wait_for_completion(&k->rendez); > +} > + > +static int > +aoe_ktstart(struct ktstate *k) > +{ > + struct task_struct *task; > + > + init_completion(&k->rendez); > + task = kthread_run(kthread, k, k->name); > + if (task == NULL || IS_ERR(task)) > + return -EFAULT; EFAULT makes no sense? > + k->task = task; > + wait_for_completion(&k->rendez); > + init_completion(&k->rendez);/* for exit */ > + return 0; > +} > > ... > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/6] x86, fpu: drop_fpu() before restoring new state from sigframe
No need to save the state with unlazy_fpu(), that is about to get overwritten by the state from the signal frame. Instead use drop_fpu() and continue to restore the new state. Also fold the stop_fpu_preload() into drop_fpu(). Signed-off-by: Suresh Siddha --- arch/x86/include/asm/fpu-internal.h |7 +-- arch/x86/kernel/xsave.c |8 +++- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index ba83a08..fe95ad0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk) preempt_enable(); } -static inline void stop_fpu_preload(struct task_struct *tsk) -{ - tsk->fpu_counter = 0; -} - static inline void drop_fpu(struct task_struct *tsk) { /* * Forget coprocessor state.. */ - stop_fpu_preload(tsk); + tsk->fpu_counter = 0; preempt_disable(); __drop_fpu(tsk); preempt_enable(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a23d100..6cfc7d9 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -381,16 +381,14 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; struct user_i387_ia32_struct env; - stop_fpu_preload(tsk); - unlazy_fpu(tsk); + drop_fpu(tsk); if (__copy_from_user(xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - drop_fpu(tsk); + __copy_from_user(&env, buf, sizeof(env))) return -1; - } sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); } else { /* * For 64-bit frames and 32-bit fsave frames, restore the user -- 1.7.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/6] x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()
kvm's guest fpu save/restore should be wrapped around kernel_fpu_begin/end(). This will avoid for example taking a DNA in kvm_load_guest_fpu() when it tries to load the fpu immediately after doing unlazy_fpu() on the host side. More importantly this will prevent the host process fpu from being corrupted. Signed-off-by: Suresh Siddha Cc: Avi Kivity --- arch/x86/kvm/x86.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42bce48..67e773c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5969,7 +5969,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5983,6 +5983,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); -- 1.7.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/6] x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage
use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and saving/restoring just the few used xmm/ymm registers. This has some advantages like: * If the task's FPU state is already active, then kernel_fpu_begin() will just save the user-state and avoiding the read/write of cr0. In general, cr0 accesses are much slower. * Manual save/restore of xmm/ymm registers will affect the 'modified' and the 'init' optimizations brought in the by xsaveopt/xrstor infrastructure. * Foward compatibility with future vector register extensions will be a problem if the xmm/ymm registers are manually saved and restored (corrupting the extended state of those vector registers). With this patch, there was no significant difference in the xor throughput using AVX, measured during boot. Signed-off-by: Suresh Siddha Cc: Jim Kukunas Cc: NeilBrown --- arch/x86/include/asm/xor_32.h | 56 +--- arch/x86/include/asm/xor_64.h | 61 ++-- arch/x86/include/asm/xor_avx.h | 54 --- 3 files changed, 29 insertions(+), 142 deletions(-) diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 4545708..aabd585 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save)\ - : "memory");\ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save)\ - : "memory");\ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x)"16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #definePF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a @@ -850,7 +810,7 @@ xor_sse_5(unsigned long b
[PATCH 6/6] x86, fpu: use non-lazy fpu restore for processors supporting xsave
Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Signed-off-by: Suresh Siddha Cc: Jim Kukunas Cc: NeilBrown Cc: Avi Kivity --- arch/x86/include/asm/fpu-internal.h | 96 +++ arch/x86/include/asm/i387.h |1 + arch/x86/include/asm/xsave.h|1 + arch/x86/kernel/i387.c | 20 ++- arch/x86/kernel/process.c | 12 +++-- arch/x86/kernel/process_32.c|4 -- arch/x86/kernel/process_64.c|4 -- arch/x86/kernel/traps.c |5 ++- arch/x86/kernel/xsave.c | 57 + 9 files changed, 140 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index fac39e9..e31cc6e 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_xsave()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_xsave()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" +"2:\n" +_ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* +* Forget coprocessor state.. +*/ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_xsave()) + drop_fpu(tsk); + else + xrstor_state(init_xstate_buf, -1); +} + /* * FPU state switching for scheduling. * @@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* +* If the task has used the math, pre-load the FPU on xsave processors +* or if the past 5 consecutive context-switches used math. +*/ + fpu.preload = tsk_used_math(new) && (use_xsave() || +new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_xsave()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_xsave() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if