[RFC 0/3] memory-hotplug: handle page race between allocation and isolation
Memory hotplug has a subtle race problem so this patchset fixes the problem (Look at [3/3] for detail and please confirm the problem before review other patches in this series.) [1/3] is just clean up and help for [2/3]. [2/3] keeps the migratetype information to freed page's index field and [3/3] uses the information. [3/3] fixes the race problem with [2/3]'s information. After applying [2/3], migratetype argument in __free_one_page and free_one_page is redundant so we can remove it but I decide to not touch them because it increases code size about 50 byte. Minchan Kim (3): mm: use get_page_migratetype instead of page_private mm: remain migratetype in freed page memory-hotplug: bug fix race between isolation and allocation include/linux/mm.h | 12 mm/page_alloc.c | 16 ++-- mm/page_isolation.c |7 +-- 3 files changed, 27 insertions(+), 8 deletions(-) -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC 1/3] mm: use get_page_migratetype instead of page_private
page allocator uses set_page_private and page_private for handling migratetype when it frees page. Let's replace them with [set|get] _page_migratetype to make it more clear. Signed-off-by: Minchan Kim --- include/linux/mm.h | 10 ++ mm/page_alloc.c | 11 +++ mm/page_isolation.c |2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c76634..86d61d6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -249,6 +249,16 @@ struct inode; #define page_private(page) ((page)->private) #define set_page_private(page, v) ((page)->private = (v)) +static inline void set_page_migratetype(struct page *page, int migratetype) +{ + set_page_private(page, migratetype); +} + +static inline int get_page_migratetype(struct page *page) +{ + return page_private(page); +} + /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 710d91c..103ba66 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -671,8 +671,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* must delete as __free_one_page list manipulates */ list_del(&page->lru); /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ - __free_one_page(page, zone, 0, page_private(page)); - trace_mm_page_pcpu_drain(page, 0, page_private(page)); + __free_one_page(page, zone, 0, + get_page_migratetype(page)); + trace_mm_page_pcpu_drain(page, 0, + get_page_migratetype(page)); } while (--to_free && --batch_free && !list_empty(list)); } __mod_zone_page_state(zone, NR_FREE_PAGES, count); @@ -731,6 +733,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, order, get_pageblock_migratetype(page)); + local_irq_restore(flags); } @@ -1134,7 +1137,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) mt = migratetype; } - set_page_private(page, mt); + set_page_migratetype(page, mt); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1301,7 +1304,7 @@ void free_hot_cold_page(struct page *page, int cold) return; migratetype = get_pageblock_migratetype(page); - set_page_private(page, migratetype); + set_page_migratetype(page, migratetype); local_irq_save(flags); if (unlikely(wasMlocked)) free_page_mlock(page); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 64abb33..acf65a7 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -199,7 +199,7 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) if (PageBuddy(page)) pfn += 1 << page_order(page); else if (page_count(page) == 0 && - page_private(page) == MIGRATE_ISOLATE) + get_page_migratetype(page) == MIGRATE_ISOLATE) pfn += 1; else break; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC 2/3] mm: remain migratetype in freed page
Page allocator doesn't keep migratetype information to page when the page is freed. This patch remains the information to freed page's index field which isn't used by free/alloc preparing so it shouldn't change any behavir except below one. This patch adds a new call site in __free_pages_ok so it might be overhead a bit but it's for high order allocation. So I believe damage isn't hurt. Signed-off-by: Minchan Kim --- include/linux/mm.h |6 -- mm/page_alloc.c|7 --- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 86d61d6..8fd32da 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -251,12 +251,14 @@ struct inode; static inline void set_page_migratetype(struct page *page, int migratetype) { - set_page_private(page, migratetype); + VM_BUG_ON((unsigned int)migratetype >= MIGRATE_TYPES); + page->index = migratetype; } static inline int get_page_migratetype(struct page *page) { - return page_private(page); + VM_BUG_ON((unsigned int)page->index >= MIGRATE_TYPES); + return page->index; } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 103ba66..32985dd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -723,6 +723,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; int wasMlocked = __TestClearPageMlocked(page); + int migratetype; if (!free_pages_prepare(page, order)) return; @@ -731,9 +732,9 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_events(PGFREE, 1 << order); - free_one_page(page_zone(page), page, order, - get_pageblock_migratetype(page)); - + migratetype = get_pageblock_migratetype(page); + set_page_migratetype(page, migratetype); + free_one_page(page_zone(page), page, order, migratetype); local_irq_restore(flags); } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC 3/3] memory-hotplug: bug fix race between isolation and allocation
Like below, memory-hotplug makes race between page-isolation and page-allocation so it can hit BUG_ON in __offline_isolated_pages. CPU A CPU B start_isolate_page_range set_migratetype_isolate spin_lock_irqsave(zone->lock) free_hot_cold_page(Page A) /* without zone->lock */ migratetype = get_pageblock_migratetype(Page A); /* * Page could be moved into MIGRATE_MOVABLE * of per_cpu_pages */ list_add_tail(&page->lru, &pcp->lists[migratetype]); set_pageblock_isolate move_freepages_block drain_all_pages /* Page A could be in MIGRATE_MOVABLE of free_list. */ check_pages_isolated __test_page_isolated_in_pageblock /* * We can't catch freed page which * is free_list[MIGRATE_MOVABLE] */ if (PageBuddy(page A)) pfn += 1 << page_order(page A); /* So, Page A could be allocated */ __offline_isolated_pages /* * BUG_ON hit or offline page * which is used by someone */ BUG_ON(!PageBuddy(page A)); Signed-off-by: Minchan Kim --- I found this problem during code review so please confirm it. Kame? mm/page_isolation.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index acf65a7..4699d1f 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -196,8 +196,11 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) continue; } page = pfn_to_page(pfn); - if (PageBuddy(page)) + if (PageBuddy(page)) { pfn += 1 << page_order(page); + if (get_page_migratetype(page) != MIGRATE_ISOLATE) + break; + } else if (page_count(page) == 0 && get_page_migratetype(page) == MIGRATE_ISOLATE) pfn += 1; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sched/rt: Avoid updating RT entry timeout twice within one tick period
Firstly please pay attention to below issue which is found in a lower version(2.6.34-rt) rather than mainline rt kernel. Although some big changes have happened from that point to now, especially every softirq does not run as one thread any more, we believe in the latest upstream version the issue possibly exists. However, currently it's really hard to be triggered after all softirqs are pushed off to the ksoftirqd thread to complete them. So please let me describe how to happen on 2.6.34-rt: On this version, each softirq has its own thread, it means there has at least one RT FIFO task per cpu. The priority of these tasks is set to 49 by default. If user launches an RT FIFO task with priority lower than 49 of softirq RT tasks, it's possible there have two RT FIFO tasks enqueued one cpu runqueue at one moment. By current strategy of balancing RT tasks, when it comes to RT tasks, we really need to put them off to a CPU that they can run on as soon as possible. Even if it means a bit of cache line flushing, but we can let RT task be run within the least latency. When the user RT FIFO task which is just launched before is running, the tick sched timer of current cpu happens. In this tick period, the timeout value of the user RT task will be updated once. Subsequently, we try to wake up one softirq RT task on its local cpu. As the priority of current user RT task is lower than the softirq RT task, the current task will be preempted by the higher priority softirq RT task. Before preemption, we check to see if current can readily move to a different cpu. If so, we will reschedule to allow RT push logic to try to move current somewhere else. Whenever the woken softirq RT task runs, it first tries to migrate the user FIFO RT task over to a cpu that is running a task of lesser priority. If migration is done, it will send an reschedule order to the found cpu by IPI interrupt. Once the target cpu responds the IPI interrupt, it will pick the migrated user RT task to preempt its current task. When the user RT task is running on the new cpu, the tick sched timer of the cpu fires. So it will tick the user RT task again. This also means the RT task timeout value will be updated again. As the migration may be done in one tick period, it means the user RT task timeout value will be updated twice within one tick. If we set a limit on the amount of cpu time for the user RT task by setrlimit(RLIMIT_RTTIME), the SIGXCPU signal should be posted upon reaching the soft limit. But when SIGXCPU signal should be sent depends on the RT task timeout value. In fact the timeout mechanism of sending SIGXCPU signal hopes the RT task timeout is increased once every tick. However, currently the timeout value may be added twice per tick. So it results in the SIGXCPU signal being sent earlier than our expected. To solve the issue, we prevent the timeout value from increasing twice within one tick time by remembering the jiffies value of lastly updating the timeout. As long as the RT task's jiffies is different with the global jiffies value, we allow its timeout to be updated. Signed-off-by: Ying Xue Signed-off-by: Fan Du Reviewed-by: Yong Zhang --- include/linux/sched.h |1 + kernel/sched/rt.c |6 +- 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a1f493..f0656a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1199,6 +1199,7 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; unsigned long timeout; + unsigned long watchdog_stamp; unsigned int time_slice; struct sched_rt_entity *back; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 573e1ca..8240d4f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1976,7 +1976,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) if (soft != RLIM_INFINITY) { unsigned long next; - p->rt.timeout++; + if (p->rt.watchdog_stamp != jiffies) { + p->rt.timeout++; + p->rt.watchdog_stamp = jiffies; + } + next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 06/11] 3.2.x: timekeeping: Fix leapsecond triggered load spike issue
This is a backport of 4873fa070ae84a4115f0b3c9dfabc224f1bc7c51 The timekeeping code misses an update of the hrtimer subsystem after a leap second happened. Due to that timers based on CLOCK_REALTIME are either expiring a second early or late depending on whether a leap second has been inserted or deleted until an operation is initiated which causes that update. Unless the update happens by some other means this discrepancy between the timekeeping and the hrtimer data stays forever and timers are expired either early or late. The reported immediate workaround - $ data -s "`date`" - is causing a call to clock_was_set() which updates the hrtimer data structures. See: http://www.sheeri.com/content/mysql-and-leap-second-high-cpu-and-fix Add the missing clock_was_set() call to update_wall_time() in case of a leap second event. The actual update is deferred to softirq context as the necessary smp function call cannot be invoked from hard interrupt context. Signed-off-by: John Stultz Reported-by: Jan Engelhardt Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-3-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/timekeeping.c |4 1 file changed, 4 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5d55185..8958ad7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -941,6 +941,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) leap = second_overflow(xtime.tv_sec); xtime.tv_sec += leap; wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } /* Accumulate raw time */ @@ -1052,6 +1054,8 @@ static void update_wall_time(void) leap = second_overflow(xtime.tv_sec); xtime.tv_sec += leap; wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } timekeeping_update(false); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 08/11] 3.2.x: hrtimers: Move lock held region in hrtimer_interrupt()
From: Thomas Gleixner This is a backport of 196951e91262fccda81147d2bcf7fdab08668b40 We need to update the base offsets from this code and we need to do that under base->lock. Move the lock held region around the ktime_get() calls. The ktime_get() calls are going to be replaced with a function which gets the time and the offsets atomically. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Signed-off-by: John Stultz Link: http://lkml.kernel.org/r/1341960205-56738-6-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/hrtimer.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3c24fb2..8f320af 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1263,11 +1263,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; + raw_spin_lock(&cpu_base->lock); entry_time = now = ktime_get(); retry: expires_next.tv64 = KTIME_MAX; - - raw_spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1344,6 +1343,7 @@ retry: * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. */ + raw_spin_lock(&cpu_base->lock); now = ktime_get(); cpu_base->nr_retries++; if (++retries < 3) @@ -1356,6 +1356,7 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; + raw_spin_unlock(&cpu_base->lock); delta = ktime_sub(now, entry_time); if (delta.tv64 > cpu_base->max_hang_time.tv64) cpu_base->max_hang_time = delta; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 07/11] 3.2.x: timekeeping: Maintain ktime_t based offsets for hrtimers
From: Thomas Gleixner This is a backport of 5b9fe759a678e05be4937ddf03d50e950207c1c0 We need to update the hrtimer clock offsets from the hrtimer interrupt context. To avoid conversions from timespec to ktime_t maintain a ktime_t based representation of those offsets in the timekeeper. This puts the conversion overhead into the code which updates the underlying offsets and provides fast accessible values in the hrtimer interrupt. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-4-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8958ad7..d5d0e5d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -161,18 +161,34 @@ static struct timespec xtime __attribute__ ((aligned (16))); static struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static struct timespec total_sleep_time; +/* Offset clock monotonic -> clock realtime */ +static ktime_t offs_real; + +/* Offset clock monotonic -> clock boottime */ +static ktime_t offs_boot; + /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ static struct timespec raw_time; /* must hold write on xtime_lock */ +static void update_rt_offset(void) +{ + struct timespec tmp, *wtm = &wall_to_monotonic; + + set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); + offs_real = timespec_to_ktime(tmp); +} + +/* must hold write on xtime_lock */ static void timekeeping_update(bool clearntp) { if (clearntp) { timekeeper.ntp_error = 0; ntp_clear(); } + update_rt_offset(); update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, timekeeper.mult); } @@ -587,6 +603,7 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); + update_rt_offset(); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -595,6 +612,12 @@ void __init timekeeping_init(void) /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; +static void update_sleep_time(struct timespec t) +{ + total_sleep_time = t; + offs_boot = timespec_to_ktime(t); +} + /** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @delta: pointer to a timespec delta value @@ -612,7 +635,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) xtime = timespec_add(xtime, *delta); wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); - total_sleep_time = timespec_add(total_sleep_time, *delta); + update_sleep_time(timespec_add(total_sleep_time, *delta)); } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
On 07/16/2012 08:16 AM, Pavel Machek wrote: >> If an implementation supports AArch32 at EL3 there could be some >> physical (or some FPGA config) switch to choose between the two. But >> since AArch64 is mandated, I don't see why one would force AArch32 at >> EL3 and therefore all lower exception levels (and make a big part of the >> processor unused). > > Actually I see one ... and I can bet it will happen. > > So you create that shiny new ARMv8 compliant CPU, 8 cores, 2GHz. HTC > will want to use it with 1GB of RAM... and put around exiting OMAP > perihepals. But that's why we have Eagle (A15). It's a very capable 32-bit design from ARM and far more sensible for such designs. You can easily build something with a few A15 clusters in it, as we're already seeing. Jon. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 10/11] 3.2.x: hrtimer: Update hrtimer base offsets each hrtimer_interrupt
This is a backport of 5baefd6d84163443215f4a99f6a20f054ef11236 The update of the hrtimer base offsets on all cpus cannot be made atomically from the timekeeper.lock held and interrupt disabled region as smp function calls are not allowed there. clock_was_set(), which enforces the update on all cpus, is called either from preemptible process context in case of do_settimeofday() or from the softirq context when the offset modification happened in the timer interrupt itself due to a leap second. In both cases there is a race window for an hrtimer interrupt between dropping timekeeper lock, enabling interrupts and clock_was_set() issuing the updates. Any interrupt which arrives in that window will see the new time but operate on stale offsets. So we need to make sure that an hrtimer interrupt always sees a consistent state of time and offsets. ktime_get_update_offsets() allows us to get the current monotonic time and update the per cpu hrtimer base offsets from hrtimer_interrupt() to capture a consistent state of monotonic time and the offsets. The function replaces the existing ktime_get() calls in hrtimer_interrupt(). The overhead of the new function vs. ktime_get() is minimal as it just adds two store operations. This ensures that any changes to realtime or boottime offsets are noticed and stored into the per-cpu hrtimer base structures, prior to any hrtimer expiration and guarantees that timers are not expired early. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-8-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/hrtimer.c | 28 ++-- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8f320af..6db7a5e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } +static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) +{ + ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; + + return ktime_get_update_offsets(offs_real, offs_boot); +} + /* * Retrigger next event is called after clock was set * @@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); - struct timespec realtime_offset, xtim, wtm, sleep; if (!hrtimer_hres_active()) return; - /* Optimized out for !HIGH_RES */ - get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - /* Adjust CLOCK_REALTIME offset */ raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - + hrtimer_update_base(base); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); } @@ -710,7 +708,6 @@ static int hrtimer_switch_to_hres(void) base->clock_base[i].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); - /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); @@ -1264,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; raw_spin_lock(&cpu_base->lock); - entry_time = now = ktime_get(); + entry_time = now = hrtimer_update_base(cpu_base); retry: expires_next.tv64 = KTIME_MAX; /* @@ -1342,9 +1339,12 @@ retry: * We need to prevent that we loop forever in the hrtimer * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. +* +* Acquire base lock for updating the offsets and retrieving +* the current time. */ raw_spin_lock(&cpu_base->lock); - now = ktime_get(); + now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) goto retry; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 00/11] 3.2-stable: Fix for leapsecond caused hrtimer/futex issue
Here is backport of the leapsecond fixes to 3.2-stable. These are less straight forward, and should get closer review. This patch set addresses two issues: 1) Deadlock leapsecond issue that a few reports described. I spent some time over the weekend trying to find a way to reproduce the hard-hang issue some folks were reporting after the leapsecond. Initially I didn't think the 6b43ae8a619d17 leap-second hrimter livelock patch needed to be backported since, I assumed it required the ntp_lock split for it to be triggered, but looking again I found that the same issue could occur prior to splitting out the ntp_lock. So I've backported that fix (and its follow-on fixups) as well as created a test case to reproduce the hard-hang deadlock. 2) Early hrtimer/futex expiration issue that was more widely observed This is the load-spike issue that a number of folks saw that did not hard hang most boxes (although some reports did show nmi-watchdogs triggering due to sudden spinning in tight loops). I've booted and tested this entire patchset on two boxes and run through a number of leapsecond related stress tests. However, additional testing and review would be appreciated. The original commits backported in this set are: Deadlock issue fixes: - 6b43ae8a619d17c4935c3320d2ef9e92bdeed05dntp: Fix leap-second hrtimer livelock dd48d708ff3e917f6d6b6c2b696c3f18c019feedntp: Correct TAI offset during leap second fad0c66c4bb836d57a5f125ecd38bed653ca863atimekeeping: Fix CLOCK_MONOTONIC inconsistency during leapsecond Helper change: (allows the following fixes to backport more easily): cc06268c6a87db156af2daed6e96a936b955cc82time: Move common updates to a function Hrtimer early-expiration issue fixes: --- f55a6faa384304c89cfef162768e88374d3312cbhrtimer: Provide clock_was_set_delayed() 4873fa070ae84a4115f0b3c9dfabc224f1bc7c51timekeeping: Fix leapsecond triggered load spike issue 5b9fe759a678e05be4937ddf03d50e950207c1c0timekeeping: Maintain ktime_t based offsets for hrtimers 196951e91262fccda81147d2bcf7fdab08668b40hrtimers: Move lock held region in hrtimer_interrupt() f6c06abfb3972ad4914cef57d8348fcb2932bc3btimekeeping: Provide hrtimer update function 5baefd6d84163443215f4a99f6a20f054ef11236hrtimer: Update hrtimer base offsets each hrtimer_interrupt 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0timekeeping: Add missing update call in timekeeping_resume() I've already done backports to all the stable kernels to 2.6.32, and will send out the rest tomorrow. Please let me know if you have any comments or feedback. thanks -john Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel John Stultz (5): 3.2.x: ntp: Fix leap-second hrtimer livelock 3.2.x: timekeeping: Fix CLOCK_MONOTONIC inconsistency during leapsecond 3.2.x: hrtimer: Provide clock_was_set_delayed() 3.2.x: timekeeping: Fix leapsecond triggered load spike issue 3.2.x: hrtimer: Update hrtimer base offsets each hrtimer_interrupt Richard Cochran (1): 3.2.x: ntp: Correct TAI offset during leap second Thomas Gleixner (5): 3.2.x: time: Move common updates to a function 3.2.x: timekeeping: Maintain ktime_t based offsets for hrtimers 3.2.x: hrtimers: Move lock held region in hrtimer_interrupt() 3.2.x: timekeeping: Provide hrtimer update function 3.2.x: timekeeping: Add missing update call in timekeeping_resume() include/linux/hrtimer.h | 10 +++- include/linux/timex.h |2 +- kernel/hrtimer.c | 53 +-- kernel/time/ntp.c | 124 +++-- kernel/time/timekeeping.c | 115 +++-- 5 files changed, 175 insertions(+), 129 deletions(-) -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 04/11] 3.2.x: time: Move common updates to a function
From: Thomas Gleixner This is a backport of cc06268c6a87db156af2daed6e96a936b955cc82 While not a bugfix itself, it allows following fixes to backport in a more straightforward manner. CC: Thomas Gleixner CC: Eric Dumazet CC: Richard Cochran Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 34 +- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5c9b67e..5d55185 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -166,6 +166,19 @@ static struct timespec total_sleep_time; */ static struct timespec raw_time; +/* must hold write on xtime_lock */ +static void timekeeping_update(bool clearntp) +{ + if (clearntp) { + timekeeper.ntp_error = 0; + ntp_clear(); + } + update_vsyscall(&xtime, &wall_to_monotonic, +timekeeper.clock, timekeeper.mult); +} + + + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -366,11 +379,7 @@ int do_settimeofday(const struct timespec *tv) xtime = *tv; - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -403,11 +412,7 @@ int timekeeping_inject_offset(struct timespec *ts) xtime = timespec_add(xtime, *ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts); - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -636,10 +641,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) __timekeeping_inject_sleeptime(delta); - timekeeper.ntp_error = 0; - ntp_clear(); - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -1052,9 +1054,7 @@ static void update_wall_time(void) wall_to_monotonic.tv_sec -= leap; } - /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(false); } /** -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 02/11] 3.2.x: ntp: Correct TAI offset during leap second
From: Richard Cochran This is a backport of dd48d708ff3e917f6d6b6c2b696c3f18c019feed When repeating a UTC time value during a leap second (when the UTC time should be 23:59:60), the TAI timescale should not stop. The kernel NTP code increments the TAI offset one second too late. This patch fixes the issue by incrementing the offset during the leap second itself. Signed-off-by: Richard Cochran Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/ntp.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4508f7f..f1eb182 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -378,6 +378,7 @@ int second_overflow(unsigned long secs) if (secs % 86400 == 0) { leap = -1; time_state = TIME_OOP; + time_tai++; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } @@ -392,7 +393,6 @@ int second_overflow(unsigned long secs) } break; case TIME_OOP: - time_tai++; time_state = TIME_WAIT; break; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 01/11] 3.2.x: ntp: Fix leap-second hrtimer livelock
From: John Stultz This is a backport of 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d This should have been backported when it was commited, but I mistook the problem as requiring the ntp_lock changes that landed in 3.4 in order for it to occur. Unfortunately the same issue can happen (with only one cpu) as follows: do_adjtimex() write_seqlock_irq(&xtime_lock); process_adjtimex_modes() process_adj_status() ntp_start_leap_timer() hrtimer_start() hrtimer_reprogram() tick_program_event() clockevents_program_event() ktime_get() seq = req_seqbegin(xtime_lock); [DEADLOCK] This deadlock will no always occur, as it requires the leap_timer to force a hrtimer_reprogram which only happens if its set and there's no sooner timer to expire. NOTE: This patch, being faithful to the original commit, introduces a bug (we don't update wall_to_monotonic), which will be resovled by backporting a following fix. Original commit message below: Since commit 7dffa3c673fbcf835cd7be80bb4aec8ad3f51168 the ntp subsystem has used an hrtimer for triggering the leapsecond adjustment. However, this can cause a potential livelock. Thomas diagnosed this as the following pattern: CPU 0CPU 1 do_adjtimex() spin_lock_irq(&ntp_lock); process_adjtimex_modes();timer_interrupt() process_adj_status();do_timer() ntp_start_leap_timer(); write_lock(&xtime_lock); hrtimer_start(); update_wall_time(); hrtimer_reprogram();ntp_tick_length() tick_program_event() spin_lock(&ntp_lock); clockevents_program_event() ktime_get() seq = req_seqbegin(xtime_lock); This patch tries to avoid the problem by reverting back to not using an hrtimer to inject leapseconds, and instead we handle the leapsecond processing in the second_overflow() function. The downside to this change is that on systems that support highres timers, the leap second processing will occur on a HZ tick boundary, (ie: ~1-10ms, depending on HZ) after the leap second instead of possibly sooner (~34us in my tests w/ x86_64 lapic). This patch applies on top of tip/timers/core. CC: Sasha Levin CC: Thomas Gleixner Reported-by: Sasha Levin Diagnoised-by: Thomas Gleixner Tested-by: Sasha Levin Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- include/linux/timex.h |2 +- kernel/time/ntp.c | 122 +++-- kernel/time/timekeeping.c | 18 +++ 3 files changed, 48 insertions(+), 94 deletions(-) diff --git a/include/linux/timex.h b/include/linux/timex.h index aa60fe7..08e90fb 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -266,7 +266,7 @@ static inline int ntp_synced(void) /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 tick_length; -extern void second_overflow(void); +extern int second_overflow(unsigned long secs); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); extern void hardpps(const struct timespec *, const struct timespec *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4b85a7a..4508f7f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -31,8 +31,6 @@ unsigned long tick_nsec; u64tick_length; static u64 tick_length_base; -static struct hrtimer leap_timer; - #define MAX_TICKADJ500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) @@ -350,60 +348,60 @@ void ntp_clear(void) } /* - * Leap second processing. If in leap-insert state at the end of the - * day, the system clock is set back one second; if in leap-delete - * state, the system clock is set ahead one second. + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (mi...@udel.edu) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * Also handles leap second processing, and returns leap offset */ -static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) +int second_overflow(unsigned long secs) { - enum hrtimer_restart res = HRTIMER_NORESTART; - - write_seqlock(&xtime_lock); + int leap = 0; + s64 delta; + /* +* Leap second processing. If in leap-insert state at the end of the +* day, the system clock is set back one second; if in leap-delete +* state, the system clock is set ahead one second. +*/
Re: [PATCH] xfs: fix comment typo of struct xfs_da_blkinfo.
On Tue, Jul 17, 2012 at 11:33:33AM +0800, Chen Baozi wrote: > I'd really love to. Right now, I am working on syslinux to support booting on > xfs partition (under pcacjr's mentoring)??? which I thought would be a nice > start to get familiar with xfs (and I did learn a lot from it). So I think > there would be more time (and experience on xfs) after I finish the xfs > support on syslinux. And I'm really looking forward to your ideas. So do > please tell me what I can help, I'll try my best to do it. Btw, if you need more reviers for the syslinus support feel free to pass it by me (or the list). Thanks for working on this! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 03/11] 3.2.x: timekeeping: Fix CLOCK_MONOTONIC inconsistency during leapsecond
From: John Stultz This is a backport of fad0c66c4bb836d57a5f125ecd38bed653ca863a which resolves a bug the previous commit. Commit 6b43ae8a61 (ntp: Fix leap-second hrtimer livelock) broke the leapsecond update of CLOCK_MONOTONIC. The missing leapsecond update to wall_to_monotonic causes discontinuities in CLOCK_MONOTONIC. Adjust wall_to_monotonic when NTP inserted a leapsecond. Reported-by: Richard Cochran Signed-off-by: John Stultz Tested-by: Richard Cochran Cc: sta...@kernel.org Link: http://lkml.kernel.org/r/1338400497-12420-1-git-send-email-john.stu...@linaro.org Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/timekeeping.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4780a7d..5c9b67e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -938,6 +938,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) xtime.tv_sec++; leap = second_overflow(xtime.tv_sec); xtime.tv_sec += leap; + wall_to_monotonic.tv_sec -= leap; } /* Accumulate raw time */ @@ -1048,7 +1049,7 @@ static void update_wall_time(void) xtime.tv_sec++; leap = second_overflow(xtime.tv_sec); xtime.tv_sec += leap; - + wall_to_monotonic.tv_sec -= leap; } /* check to see if there is a new clocksource to use */ -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 11/11] 3.2.x: timekeeping: Add missing update call in timekeeping_resume()
From: Thomas Gleixner This is a backport of 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0 The leap second rework unearthed another issue of inconsistent data. On timekeeping_resume() the timekeeper data is updated, but nothing calls timekeeping_update(), so now the update code in the timer interrupt sees stale values. This has been the case before those changes, but then the timer interrupt was using stale data as well so this went unnoticed for quite some time. Add the missing update call, so all the data is consistent everywhere. Reported-by: Andreas Schwab Reported-and-tested-by: "Rafael J. Wysocki" Reported-and-tested-by: Martin Steigerwald Cc: LKML Cc: Linux PM list Cc: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra , Cc: Prarit Bhargava Cc: sta...@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Linus Torvalds Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- kernel/time/timekeeping.c |1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4938c5e..03e67d4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -699,6 +699,7 @@ static void timekeeping_resume(void) timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_sequnlock_irqrestore(&xtime_lock, flags); touch_softlockup_watchdog(); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 05/11] 3.2.x: hrtimer: Provide clock_was_set_delayed()
This is a backport of f55a6faa384304c89cfef162768e88374d3312cb clock_was_set() cannot be called from hard interrupt context because it calls on_each_cpu(). For fixing the widely reported leap seconds issue it is necessary to call it from hard interrupt context, i.e. the timer tick code, which does the timekeeping updates. Provide a new function which denotes it in the hrtimer cpu base structure of the cpu on which it is called and raise the hrtimer softirq. We then execute the clock_was_set() notificiation from softirq context in run_hrtimer_softirq(). The hrtimer softirq is rarely used, so polling the flag there is not a performance issue. [ tglx: Made it depend on CONFIG_HIGH_RES_TIMERS. We really should get rid of all this ifdeffery ASAP ] Signed-off-by: John Stultz Reported-by: Jan Engelhardt Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-2-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- include/linux/hrtimer.h |9 - kernel/hrtimer.c| 20 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0dc30..c9ec940 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -165,6 +165,7 @@ enum hrtimer_base_type { * @lock: lock protecting the base and associated clock bases * and timers * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -177,7 +178,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - unsigned long active_bases; + unsigned intactive_bases; + unsigned intclock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSECHIGH_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_HIGH_RES +extern void clock_was_set_delayed(void); + #else # define MONOTONIC_RES_NSECLOW_RES_NSEC @@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; } + +static inline void clock_was_set_delayed(void) { } + #endif extern void clock_was_set(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae34bf5..3c24fb2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -717,6 +717,19 @@ static int hrtimer_switch_to_hres(void) return 1; } +/* + * Called from timekeeping code to reprogramm the hrtimer interrupt + * device. If called from the timer interrupt context we defer it to + * softirq context. + */ +void clock_was_set_delayed(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -1395,6 +1408,13 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (cpu_base->clock_was_set) { + cpu_base->clock_was_set = 0; + clock_was_set(); + } + hrtimer_peek_ahead_timers(); } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 09/11] 3.2.x: timekeeping: Provide hrtimer update function
From: Thomas Gleixner This is a backport of f6c06abfb3972ad4914cef57d8348fcb2932bc3b To finally fix the infamous leap second issue and other race windows caused by functions which change the offsets between the various time bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a function which atomically gets the current monotonic time and updates the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic overhead. The previous patch which provides ktime_t offsets allows us to make this function almost as cheap as ktime_get() which is going to be replaced in hrtimer_interrupt(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: sta...@vger.kernel.org Signed-off-by: John Stultz Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johns...@us.ibm.com Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Linux Kernel Signed-off-by: John Stultz --- include/linux/hrtimer.h |1 + kernel/time/timekeeping.c | 34 ++ 2 files changed, 35 insertions(+) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c9ec940..cc07d27 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -327,6 +327,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d5d0e5d..4938c5e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1240,6 +1240,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqretry(&xtime_lock, seq)); } +#ifdef CONFIG_HIGH_RES_TIMERS +/** + * ktime_get_update_offsets - hrtimer helper + * @real: pointer to storage for monotonic -> realtime offset + * @_boot: pointer to storage for monotonic -> boottime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ +ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot) +{ + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { + seq = read_seqbegin(&xtime_lock); + + secs = xtime.tv_sec; + nsecs = xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *real = offs_real; + *boot = offs_boot; + } while (read_seqretry(&xtime_lock, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *real); + return now; +} +#endif + /** * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format */ -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
On 07/16/2012 04:24 AM, Avi Kivity wrote: > Can the same kernel image run in both EL1 and EL2? I noticed some .if > ELs in the assembler files. I guess they could be compiled multiple > times and the correct version chosen at runtime, or patched up like x86 > does with alternative(). > One of the advantages kvm has to Linux distributors is that the same > kernel image can be used the hypervisor, guest, and bare metal. I'd > like to preserve that for arm64. The idea is that you would always enter at EL2 and then drop privilege down to EL1 if you're not doing virt. That achieves effectively the same thing that you get on x86. The virtualization in AArch64 is designed more from the POV of separate hypervisors like Xen so we just need to make sure we always start with enough privilege. Jon. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/11] 3.2-stable: Fix for leapsecond caused hrtimer/futex issue
On 07/17/2012 12:05 AM, John Stultz wrote: 1) Deadlock leapsecond issue that a few reports described. I spent some time over the weekend trying to find a way to reproduce the hard-hang issue some folks were reporting after the leapsecond. Initially I didn't think the 6b43ae8a619d17 leap-second hrimter livelock patch needed to be backported since, I assumed it required the ntp_lock split for it to be triggered, but looking again I found that the same issue could occur prior to splitting out the ntp_lock. So I've backported that fix (and its follow-on fixups) as well as created a test case to reproduce the hard-hang deadlock. Attached is the test case I used to reproduce and test the solution to the hard-hang deadlock. WARNING: THIS TEST WILL LIKELY HARD LOCK YOUR BOX IN IRQ CONTEXT! YOU MAY LOSE DATA! RUN AT YOUR OWN RISK! thanks -john /* Demo leapsecond deadlock * by: john stultz (johns...@us.ibm.com) * (C) Copyright IBM 2012 * Licensed under the GPL * * This test demonstrates leapsecond deadlock that is possibe * on kernels from 2.6.26 to 3.3. * * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA * RUN AT YOUR OWN RISK! * To build: * $ gcc leapcrash.c -o leapcrash -lrt */ #include #include #include #include #include #include #include /* clear NTP time_status & time_state */ void clear_time_state(void) { struct timex tx; int ret; /* * XXX - The fact we have to call this twice seems * to point to a slight issue in the kernel's ntp state * managment. Needs to be investigated further. */ tx.modes = ADJ_STATUS; tx.status = STA_PLL; ret = adjtimex(&tx); tx.modes = ADJ_STATUS; tx.status = 0; ret = adjtimex(&tx); } /* Make sure we cleanup on ctrl-c */ void handler(int unused) { clear_time_state(); exit(0); } int main(void) { struct timex tx; struct timespec ts; time_t next_leap; int count =0; setbuf(stdout, NULL); signal(SIGINT, handler); signal(SIGKILL, handler); printf("This runs continuously. Press ctrl-c to stop\n"); clear_time_state(); /* Get the current time */ clock_gettime(CLOCK_REALTIME, &ts); /* Calculate the next possible leap second 23:59:60 GMT */ next_leap = ts.tv_sec; next_leap += 86400 - (next_leap % 86400); while (1) { struct timeval tv; /* set the time to 2 seconds before the leap */ tv.tv_sec = next_leap - 2; tv.tv_usec = 0; settimeofday(&tv, NULL); adjtimex(&tx); /* hammer on adjtime w/ STA_INS */ while (tx.time.tv_sec < next_leap + 1) { /* Set the leap second insert flag */ tx.modes = ADJ_STATUS; tx.status = STA_INS; adjtimex(&tx); } clear_time_state(); printf("."); } return 0; }
Re: [PATCHv3 0/3] perf tool: Add new event group management
On Mon, Jul 9, 2012 at 1:05 PM, Jiri Olsa wrote: > > On Fri, Jul 06, 2012 at 03:42:54AM +0200, Stephane Eranian wrote: > > On Fri, Jul 6, 2012 at 3:32 AM, Ulrich Drepper wrote: > > > On Thu, Jul 5, 2012 at 12:15 PM, Stephane Eranian > > > wrote: > > >> I don't understand why you actually need the :2 suffix. There can > > >> only be one leader. So assume it is the first one. Users have to > > >> know the first one is the leader which seems like a natural thing > > >> to do for me. It would make you syntax less ugly than it already > > >> is. > > > > > > In a blue sky world I would have done this. In fact, this is what I > > > tried before reading the sources to find out there is no group support > > > so far. But given that multiple -e options already have a meaning I > > > would be hesitant to change this. > > > > That's why I said you activate grouping via -e only when you have > > the --group-events or --group-reads option in front. That would > > not change the meaning of the multiple -e when none of those > > group options are specified. > > I discussed this with peter.. > > the {} thing allows: 1) multiple groups in a single -e, 2) group > attributes > And what's the value of 1) exactly? What's wrong with passing multiple -e ? The only group attribute I can think of would be :u, :k. Not so much typing. > as for the leader sampling, we can have the first event to become the leader > by default (omit the leader index modifier) and enable the leader sampling by > another modifier: > I don't understand this sentence. > right, just make it a single 'l' (el not one) to indicate 'leader' > sampling > To me ,this looks a bit of an over-engineered design and it is not based on any actual user requests. Don't get me wrong, grouping is useful and required but nobody has ever asked for that level of flexibility. The syntax you have now is already very rich for my taste. > > jirka -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] snd-opti9xx: Implement suspend/resume
Implement suspend/resume support for Opti 92x and 93x chips. Tested with Opti 929A+AD1848 and Opti 931. Signed-off-by: Ondrej Zary --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -136,8 +136,8 @@ struct snd_opti9xx { #ifdef OPTi93X unsigned long mc_indir_index; struct resource *res_mc_indir; - struct snd_wss *codec; #endif /* OPTi93X */ + struct snd_wss *codec; unsigned long pwd_reg; spinlock_t lock; @@ -870,9 +870,7 @@ static int __devinit snd_opti9xx_probe(struct snd_card *card) &codec); if (error < 0) return error; -#ifdef OPTi93X chip->codec = codec; -#endif error = snd_wss_pcm(codec, 0, &pcm); if (error < 0) return error; @@ -1053,11 +1051,55 @@ static int __devexit snd_opti9xx_isa_remove(struct device *devptr, return 0; } +#ifdef CONFIG_PM +static int snd_opti9xx_suspend(struct snd_card *card) +{ + struct snd_opti9xx *chip = card->private_data; + + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + chip->codec->suspend(chip->codec); + return 0; +} + +static int snd_opti9xx_resume(struct snd_card *card) +{ + struct snd_opti9xx *chip = card->private_data; + int error, xdma2; +#if defined(CS4231) || defined(OPTi93X) + xdma2 = dma2; +#else + xdma2 = -1; +#endif + + error = snd_opti9xx_configure(chip, port, irq, dma1, xdma2, + mpu_port, mpu_irq); + if (error) + return error; + chip->codec->resume(chip->codec); + snd_power_change_state(card, SNDRV_CTL_POWER_D0); + return 0; +} + +static int snd_opti9xx_isa_suspend(struct device *dev, unsigned int n, + pm_message_t state) +{ + return snd_opti9xx_suspend(dev_get_drvdata(dev)); +} + +static int snd_opti9xx_isa_resume(struct device *dev, unsigned int n) +{ + return snd_opti9xx_resume(dev_get_drvdata(dev)); +} +#endif + static struct isa_driver snd_opti9xx_driver = { .match = snd_opti9xx_isa_match, .probe = snd_opti9xx_isa_probe, .remove = __devexit_p(snd_opti9xx_isa_remove), - /* FIXME: suspend/resume */ +#ifdef CONFIG_PM + .suspend= snd_opti9xx_isa_suspend, + .resume = snd_opti9xx_isa_resume, +#endif .driver = { .name = DEV_NAME }, @@ -1123,12 +1165,29 @@ static void __devexit snd_opti9xx_pnp_remove(struct pnp_card_link * pcard) snd_opti9xx_pnp_is_probed = 0; } +#ifdef CONFIG_PM +static int snd_opti9xx_pnp_suspend(struct pnp_card_link *pcard, + pm_message_t state) +{ + return snd_opti9xx_suspend(pnp_get_card_drvdata(pcard)); +} + +static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) +{ + return snd_opti9xx_resume(pnp_get_card_drvdata(pcard)); +} +#endif + static struct pnp_card_driver opti9xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, .name = "opti9xx", .id_table = snd_opti9xx_pnpids, .probe = snd_opti9xx_pnp_probe, .remove = __devexit_p(snd_opti9xx_pnp_remove), +#ifdef CONFIG_PM + .suspend= snd_opti9xx_pnp_suspend, + .resume = snd_opti9xx_pnp_resume, +#endif }; #endif -- Ondrej Zary -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] Add Wondermedia Technologies (wmt) to docs/devicetree/vendor-bindings.txt
>From 756ae5c2f475d679649adff99058679b651af8d9 Mon Sep 17 00:00:00 2001 From: Tony Prisk Date: Tue, 17 Jul 2012 19:09:31 +1200 Subject: [PATCH] Add Wondermedia Technologies (wmt) to vendor-prefixes.txt Signed-off-by: Tony Prisk --- .../devicetree/bindings/vendor-prefixes.txt|1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index db4d3af..b87dc6b 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -48,4 +48,5 @@ stSTMicroelectronics stericsson ST-Ericsson ti Texas Instruments wlfWolfson Microelectronics +wmtWondermedia Technologies xlnx Xilinx -- 1.7.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 2/2] block: split discard into aligned requests
Looks good. Any chance we can get this into the 3.6 queue (and possibly -stable)? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] ubi: Fix bad PEBs reserve caclulation
Hi Artem, On Mon, 16 Jul 2012 18:33:57 +0300 Artem Bityutskiy wrote: > But one more think is the mtd web-site. I've grepped for '1%' and there > are plenty of them. I've changed them all to 2% more or less > mechanically - only cleaned up one section by removing out-of-date > information. Would you please grep for '2%' and review if the > information there is reasonable? Also, would you please add some more > info to this FAQ entry: > > http://linux-mtd.infradead.org/faq/ubi.html#L_bad_blocks_exceeded > > Or even better if you could write a separate section for this stuff in > the documentation, then you could remove that FAQ entry completely. Sure, I'll try to do it as well. But it would only make sense after accepting Richard's patchset as well, which suggests configuring per-ubi-device beb limit via the attach ioctl. I didn't had the time to properly review it yet, but IMO it looks more controversial. http://lists.infradead.org/pipermail/linux-mtd/2012-July/042803.html Regards, Shmulik -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RESEND 0/3] scsi: fix internal write cache issue on usb hdd.
On Mon, 2012-07-16 at 16:48 -0700, Greg KH wrote: > On Sat, Jul 07, 2012 at 11:04:45PM -0400, Namjae Jeon wrote: > > From: Namjae Jeon > > > > The numbers of USB HDDs(All USB HDD I checked) does not respond > > correctly to scsi mode sense command for retrieving the write cache > > page status. Even though write cache is enabled by default, due to > > scsi driver assume that cache is not enabled which in turn might lead > > to loss of data since data still will be in cache. > > This result that all filesystems is not stable on USB HDD when the > > device is unplugged abruptly, even though these are having journaling > > feature. Our first trying is that scsi driver send ATA command > > (ATA Pass through, #85) to USB HDD after failure from normal routine to > > know write cache enable. > > We have known it is dangerous after testing several USB HDD. some of > > HDD is stalled by this command(A-DATA HDD). So we tried to make the > > patch James Bottomley's suggestion(usb quirk) on version 2 that add > > product ID and verdor ID of USB HDD to USB quirk list after checking > > write cache. > > All filesystem will be stable on USB HDD registered in quirk list. > > And it will be updated continuously. > > Now applied to the usb-next branch. It's been in scsi#misc for ten days with no problems. Lets leave it there rather than create merge and rebase issues. Thanks, James -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V2] [SCSI] async: Fixup arguments of async_synchronize_full_domain in alsa-soc
On Mon, 2012-07-16 at 15:56 -0700, Dan Williams wrote: > On Mon, Jul 16, 2012 at 3:17 PM, Marek Vasut wrote: > [..] > > sound/soc/soc-dapm.c |2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > V2: CC proper people, hopefully proper mailing list > > Adjust the patch subject with proper tags > > > > diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c > > index eded657..e491eb0 100644 > > --- a/sound/soc/soc-dapm.c > > +++ b/sound/soc/soc-dapm.c > > @@ -1572,7 +1572,7 @@ static int dapm_power_widgets(struct > > snd_soc_dapm_context *dapm, int event) > > struct snd_soc_dapm_context *d; > > LIST_HEAD(up_list); > > LIST_HEAD(down_list); > > - LIST_HEAD(async_domain); > > + ASYNC_DOMAIN_EXCLUSIVE(async_domain); > > enum snd_soc_bias_level bias; > > > > Thanks. This was folded into the resend that was submitted week [1] OK, I missed that with the cockup in the cover letters. James N�r��yb�X��ǧv�^�){.n�+{zX����ܨ}���Ơz�&j:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a��� 0��h���i
Re: linux-next: Tree for July 12 (v4l2-ioctl.c)
Hi all, On Tue, 17 Jul 2012 08:48:37 +0200 Hans Verkuil wrote: > > On Tue July 17 2012 04:25:35 Ming Lei wrote: > > On Thu, Jul 12, 2012 at 11:49 PM, Randy Dunlap wrote: > > > > > > on i386 and/or x86_64, drivers/media/video/v4l2-ioctl.c has too many > > > errors to be listed here. This is the beginning few lines of the errors: > > > > I see the errors on ARM too. > > A fix can be found here: > > http://patchwork.linuxtv.org/patch/13336/ And I have been applying that fix to linux-next since next-20120713 - though Mauro has not applied it to the v4l-dvb tree yet ... -- Cheers, Stephen Rothwells...@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ pgpLC35Jkeo1S.pgp Description: PGP signature
Re: [PATCH RESEND] gpio: of_get_named_gpio_flags() return -EPROBE_DEFER if GPIO not yet available
On Tue, Jul 17, 2012 at 12:16:48AM +0200, Linus Walleij wrote: > On Sun, Jul 15, 2012 at 11:11 PM, Mark Brown > > I had thought a version of this > > had been submitted already and was in a similar place to the one for > > gpio_request(). > Me to... Greg just merged the driver core patch for pushing the deferred devices to the end of the dpm list before reprobing them which should address Grant's concern about doing this in gpiolib. signature.asc Description: Digital signature
[PATCH] gpiolib: arizona: Add WM5110 support
Signed-off-by: Mark Brown --- drivers/gpio/gpio-arizona.c |1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 0c1becf..8740d2e 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -112,6 +112,7 @@ static int __devinit arizona_gpio_probe(struct platform_device *pdev) switch (arizona->type) { case WM5102: + case WM5110: arizona_gpio->gpio_chip.ngpio = 5; break; default: -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
On Mon, 2012-07-16 at 19:20 +0200, Paolo Bonzini wrote: > Il 16/07/2012 18:18, James Bottomley ha scritto: > >> > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > >> > index b583277..6d8ca08 100644 > >> > --- a/drivers/scsi/scsi_lib.c > >> > +++ b/drivers/scsi/scsi_lib.c > >> > @@ -843,8 +843,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, > >> > unsigned int good_bytes) > >> > } else if (sense_valid && !sense_deferred) { > >> > switch (sshdr.sense_key) { > >> > case UNIT_ATTENTION: > >> > -if (cmd->device->removable) { > >> > -/* Detected disc change. Set a bit > >> > +if (cmd->device->removable && > >> > +(sshdr.asc == 0x3a || > >> > + (sshdr.asc == 0x28 && sshdr.ascq == > >> > 0x00))) { > >> > +/* "No medium" or "Medium may have > >> > changed." > >> > + * This means a disc change. Set a bit > > This type of change would likely cause a huge cascade of errors in real > > removable media devices. Under the MMC standards, which a lot of the > > older removable discs seem to follow, UNIT ATTENTION indicates either > > medium change or device reset (which we check for and eat lower down); > > we can't rely on them giving proper SBC-2 sense codes. If you want to > > pretend to be removable media, you have to conform to its standards. > > Would you accept a patch doing the opposite, i.e. passing some sense > codes such as PARAMETERS CHANGED and TARGET OPERATING CONDITIONS HAVE > CHANGED? Could you explain what the problem actually is? It looks like you had a reason to mark virtio-scsi as removable, even though it isn't, and now you want to add further hacks because being removable doesn't quite work. Lets go back and see if there's a more correct way to do whatever it is you want to do. James -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [opensuse-kernel] Re: [RFC] Simplifying kernel configuration for distro issues
On Mon, Jul 16, 2012 at 10:56 PM, Linus Torvalds wrote: > On Mon, Jul 16, 2012 at 12:26 PM, wrote: >> Some of the proposed ways to implement the minimum distro kernel would not >> allow you to override the distro defaults because they would be implemented >> by setting dependancies, not by selecting options that you as the user could >> then unselect. > > The sanest thing to do is just a list of "select" statements. And in > any case it would have to depend on the "distro config" entry, so EVEN > THEN you could just create the Kconfig file, then edit out the distro > config thing, and then do whatever you want. Except that "select" is one of the ugliest things in Kconfig, as it blindly sets a symbol without checking if its dependencies are fulfilled. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch -next] ext4: locking issue on error path
On Tue, Jul 17, 2012 at 09:31:06AM +0300, Dan Carpenter wrote: > We recently changed how the locking worked here, but this error path was > missed. > > Signed-off-by: Dan Carpenter Sorry, it is my fault. Thanks for pointing out this bug. Regards, Zheng > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 8c84070..2728fb7 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -3031,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb > *iocb, > if (!is_sync_kiocb(iocb)) { > ext4_io_end_t *io_end = > ext4_init_io_end(inode, GFP_NOFS); > - if (!io_end) > - return -ENOMEM; > + if (!io_end) { > + ret = -ENOMEM; > + goto retake_lock; > + } > io_end->flag |= EXT4_IO_END_DIRECT; > iocb->private = io_end; > /* > @@ -3094,6 +3096,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb > *iocb, > ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); > } > > +retake_lock: > /* take i_mutex locking again if we do a ovewrite dio */ > if (overwrite) { > up_read(&EXT4_I(inode)->i_data_sem); > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 00/11] omap: musb: Add device tree support
This patch series adds device tree support for MUSB and device tree support for all the related modules to get MUSB working in OMAP platform. A new omap-usb2 phy driver has been added (with only dt suppport) to perform phy configurations. Previously this configuration was performed by twl6030, using pdata function pointers. With the addition of omap-usb2 to perform phy configurations, twl6030 is made as a comparator driver to detect VBUS and ID events and notify it to the glue layer. musb core is _NOT_ yet converted to support device tree support as it would need lot of driver re-design because of its enormous use of function pointers. That will be in _TO DO_ list. Changes from v1: * Fixed Rajendra Nayak comments (regulator naming, compatible naming of musb and other minor cleanups.) * It's agreed to have ocp2scp in drivers/bus and usb2 phy is a child of ocp2scp, the documentation is updated accordingly. Changes from RFC: Removed the dependency on [RFC PATCH 00/11] OMAP System Control Module. Writing to control module register is now handled in otg driver itself. Once the system control module driver get upstreamed, I'll send a patch to make use of API's in control module driver to write to control module register. This series was developed on git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git xceiv This patch series depends on [PATCH 0/2] omap: add ocp2scp as a bus driver Performed MUSB device mode testing on OMAP4 panda, OMAP4 SDP and OMAP3 beagle. Kishon Vijay Abraham I (11): drivers: usb: otg: add a new driver for omap usb2 phy arm/dts: omap: Add omap-usb2 dt data drivers: usb: otg: make twl6030_usb as a comparator driver to omap_usb2 arm: omap: hwmod: add a new addr space in otg fo writing to control module drivers: usb: twl6030: Add dt support for twl6030 usb arm/dts: Add twl6030-usb data drivers: usb: twl4030: Add device tree support for twl4030 usb arm/dts: Add twl4030-usb data drivers: usb: musb: Add device tree support for omap musb glue arm/dts: omap: Add usb_otg and glue data arm: omap: phy: remove unused functions from omap-phy-internal.c .../devicetree/bindings/bus/omap-ocp2scp.txt |3 + Documentation/devicetree/bindings/usb/omap-usb.txt | 48 .../devicetree/bindings/usb/twl-usb.txt| 41 +++ arch/arm/boot/dts/omap3-beagle.dts |6 + arch/arm/boot/dts/omap3-evm.dts|6 + arch/arm/boot/dts/omap3.dtsi |8 + arch/arm/boot/dts/omap4-panda.dts | 10 + arch/arm/boot/dts/omap4-sdp.dts| 10 + arch/arm/boot/dts/omap4.dtsi | 13 + arch/arm/boot/dts/twl4030.dtsi | 21 ++ arch/arm/boot/dts/twl6030.dtsi |6 + arch/arm/mach-omap2/omap_hwmod_44xx_data.c |5 + arch/arm/mach-omap2/omap_phy_internal.c| 138 -- arch/arm/mach-omap2/twl-common.c |5 - arch/arm/mach-omap2/usb-musb.c |3 - drivers/usb/musb/omap2430.c| 107 +++- drivers/usb/musb/omap2430.h|9 + drivers/usb/otg/Kconfig| 10 + drivers/usb/otg/Makefile |1 + drivers/usb/otg/omap-usb2.c| 271 drivers/usb/otg/twl4030-usb.c | 26 ++- drivers/usb/otg/twl6030-usb.c | 153 +++ include/linux/usb/omap_usb.h | 45 include/linux/usb/phy_companion.h | 34 +++ 24 files changed, 706 insertions(+), 273 deletions(-) create mode 100644 Documentation/devicetree/bindings/usb/omap-usb.txt create mode 100644 Documentation/devicetree/bindings/usb/twl-usb.txt create mode 100644 drivers/usb/otg/omap-usb2.c create mode 100644 include/linux/usb/omap_usb.h create mode 100644 include/linux/usb/phy_companion.h -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 02/11] arm/dts: omap: Add omap-usb2 dt data
Add omap-usb2 data node in omap4 device tree file. Signed-off-by: Kishon Vijay Abraham I --- arch/arm/boot/dts/omap4.dtsi |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index bda5df3..4d2dcc1 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -279,6 +279,11 @@ #size-cells = <1>; ranges; ti,hwmods = "ocp2scp_usb_phy"; + usb2phy@0x4a0ad080 { + compatible = "ti,omap-usb2"; + reg = <0x4a0ad080 0x58>, + <0x4a002300 0x1>; + }; }; }; }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 06/11] arm/dts: Add twl6030-usb data
Add twl6030-usb data node in twl6030 device tree file Signed-off-by: Kishon Vijay Abraham I --- arch/arm/boot/dts/omap4-panda.dts |4 arch/arm/boot/dts/omap4-sdp.dts |4 arch/arm/boot/dts/twl6030.dtsi|6 ++ 3 files changed, 14 insertions(+), 0 deletions(-) diff --git a/arch/arm/boot/dts/omap4-panda.dts b/arch/arm/boot/dts/omap4-panda.dts index 1efe0c5..7052422 100644 --- a/arch/arm/boot/dts/omap4-panda.dts +++ b/arch/arm/boot/dts/omap4-panda.dts @@ -89,3 +89,7 @@ ti,non-removable; bus-width = <4>; }; + +&twlusb { + usb-supply = <&vusb>; +}; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index d08c4d1..6326d7c 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -158,3 +158,7 @@ bus-width = <4>; ti,non-removable; }; + +&twlusb { + usb-supply = <&vusb>; +}; diff --git a/arch/arm/boot/dts/twl6030.dtsi b/arch/arm/boot/dts/twl6030.dtsi index 3b2f351..5efd6d3 100644 --- a/arch/arm/boot/dts/twl6030.dtsi +++ b/arch/arm/boot/dts/twl6030.dtsi @@ -83,4 +83,10 @@ clk32kg: regulator@12 { compatible = "ti,twl6030-clk32kg"; }; + + twlusb: twl6030-usb { + compatible = "ti,twl6030-usb"; + interrupts = < 4 10 >; + regulator = "vusb"; + }; }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 04/11] arm: omap: hwmod: add a new addr space in otg for writing to control module
The mailbox register for usb otg in omap is present in control module. On detection of any events VBUS or ID, this register should be written to send the notification to musb core. Till we have a separate control module driver to write to control module, omap2430 will handle the register writes to control module by itself. So a new address space to represent this control module register is added to usb_otg_hs. Signed-off-by: Kishon Vijay Abraham I --- arch/arm/mach-omap2/omap_hwmod_44xx_data.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index ba24d15..c50d828 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -5922,6 +5922,11 @@ static struct omap_hwmod_addr_space omap44xx_usb_otg_hs_addrs[] = { .pa_end = 0x4a0ab7ff, .flags = ADDR_TYPE_RT }, + { + .pa_start = 0x4a00233c, + .pa_end = 0x4a00233f, + .flags = ADDR_TYPE_RT + }, { } }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 05/11] drivers: usb: twl6030: Add dt support for twl6030 usb
Add device tree support for twl6030 usb driver. Update the Documentation with device tree binding information. Signed-off-by: Kishon Vijay Abraham I --- .../devicetree/bindings/usb/twl-usb.txt| 22 +++ drivers/usb/otg/twl6030-usb.c | 39 +--- 2 files changed, 48 insertions(+), 13 deletions(-) create mode 100644 Documentation/devicetree/bindings/usb/twl-usb.txt diff --git a/Documentation/devicetree/bindings/usb/twl-usb.txt b/Documentation/devicetree/bindings/usb/twl-usb.txt new file mode 100644 index 000..e3f6d73 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/twl-usb.txt @@ -0,0 +1,22 @@ +USB COMPARATOR OF TWL CHIPS + +TWL6030 USB COMPARATOR + - compatible : Should be "ti,twl6030-usb" + - interrupts : Two interrupt numbers to the cpu should be specified. First + interrupt number is the otg interrupt number that raises ID interrupts when + the controller has to act as host and the second interrupt number is the + usb interrupt number that raises VBUS interrupts when the controller has to + act as device + - usb-supply : phandle to the regulator device tree node. It should be vusb + if it is twl6030 or ldousb if it is twl6025 subclass. + +twl6030-usb { + compatible = "ti,twl6030-usb"; + interrupts = < 4 10 >; + regulator = "vusb"; +}; + +Board specific device node entry +&twl6030-usb { + usb-supply = <&vusb>; +}; diff --git a/drivers/usb/otg/twl6030-usb.c b/drivers/usb/otg/twl6030-usb.c index 9994dd22..6b0d0a1 100644 --- a/drivers/usb/otg/twl6030-usb.c +++ b/drivers/usb/otg/twl6030-usb.c @@ -105,7 +105,7 @@ struct twl6030_usb { u8 asleep; boolirq_enabled; boolvbus_enable; - unsigned long features; + const char *regulator; }; #definecomparator_to_twl(x) container_of((x), struct twl6030_usb, comparator) @@ -153,13 +153,6 @@ static int twl6030_start_srp(struct phy_companion *comparator) static int twl6030_usb_ldo_init(struct twl6030_usb *twl) { - char *regulator_name; - - if (twl->features & TWL6025_SUBCLASS) - regulator_name = "ldousb"; - else - regulator_name = "vusb"; - /* Set to OTG_REV 1.3 and turn on the ID_WAKEUP_COMP */ twl6030_writeb(twl, TWL6030_MODULE_ID0 , 0x1, TWL6030_BACKUP_REG); @@ -169,7 +162,7 @@ static int twl6030_usb_ldo_init(struct twl6030_usb *twl) /* Program MISC2 register and set bit VUSB_IN_VBAT */ twl6030_writeb(twl, TWL6030_MODULE_ID0 , 0x10, TWL6030_MISC2); - twl->usb3v3 = regulator_get(twl->dev, regulator_name); + twl->usb3v3 = regulator_get(twl->dev, twl->regulator); if (IS_ERR(twl->usb3v3)) return -ENODEV; @@ -321,9 +314,9 @@ static int __devinit twl6030_usb_probe(struct platform_device *pdev) { struct twl6030_usb *twl; int status, err; - struct twl4030_usb_data *pdata; - struct device *dev = &pdev->dev; - pdata = dev->platform_data; + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct twl4030_usb_data *pdata = dev->platform_data; twl = devm_kzalloc(dev, sizeof *twl, GFP_KERNEL); if (!twl) @@ -332,13 +325,24 @@ static int __devinit twl6030_usb_probe(struct platform_device *pdev) twl->dev= &pdev->dev; twl->irq1 = platform_get_irq(pdev, 0); twl->irq2 = platform_get_irq(pdev, 1); - twl->features = pdata->features; twl->linkstat = OMAP_MUSB_UNKNOWN; twl->comparator.set_vbus= twl6030_set_vbus; twl->comparator.start_srp = twl6030_start_srp; omap_usb2_set_comparator(&twl->comparator); + if (np) { + twl->regulator = "usb"; + } else if (pdata) { + if (pdata->features & TWL6025_SUBCLASS) + twl->regulator = "ldousb"; + else + twl->regulator = "vusb"; + } else { + dev_err(&pdev->dev, "twl6030 initialized without pdata\n"); + return -EINVAL; + } + /* init spinlock for workqueue */ spin_lock_init(&twl->lock); @@ -400,12 +404,21 @@ static int __exit twl6030_usb_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF +static const struct of_device_id twl6030_usb_id_table[] = { + { .compatible = "ti,twl6030-usb" }, + {} +}; +MODULE_DEVICE_TABLE(of, twl6030_usb_id_table); +#endif + static struct platform_driver twl6030_usb_driver = { .probe = twl6030_usb_probe, .remove = __exit_p(twl6030_usb_remove), .driver = { .name = "twl6030_usb", .owner = THIS_MODULE, +
[PATCH v2 10/11] arm/dts: omap: Add usb_otg and glue data
Add usb otg data node in omap4/omap3 device tree file. Also update the node with board specific setting in omapx-.dts file. Signed-off-by: Kishon Vijay Abraham I --- arch/arm/boot/dts/omap3-beagle.dts |6 ++ arch/arm/boot/dts/omap3-evm.dts|6 ++ arch/arm/boot/dts/omap3.dtsi |8 arch/arm/boot/dts/omap4-panda.dts |6 ++ arch/arm/boot/dts/omap4-sdp.dts|6 ++ arch/arm/boot/dts/omap4.dtsi |8 6 files changed, 40 insertions(+), 0 deletions(-) diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index 5b4506c..f3d7076 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -67,3 +67,9 @@ &mmc3 { status = "disable"; }; + +&usb_otg_hs { + interface_type = <0>; + mode = <3>; + power = <50>; +}; diff --git a/arch/arm/boot/dts/omap3-evm.dts b/arch/arm/boot/dts/omap3-evm.dts index 2eee16e..8963b3d 100644 --- a/arch/arm/boot/dts/omap3-evm.dts +++ b/arch/arm/boot/dts/omap3-evm.dts @@ -18,3 +18,9 @@ reg = <0x8000 0x1000>; /* 256 MB */ }; }; + +&usb_otg_hs { + interface_type = <0>; + mode = <3>; + power = <50>; +}; diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 99474fa..f2694c9 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -215,5 +215,13 @@ compatible = "ti,omap3-hsmmc"; ti,hwmods = "mmc3"; }; + + usb_otg_hs: usb_otg_hs@4a0ab000 { + compatible = "ti,omap3-musb"; + ti,hwmods = "usb_otg_hs"; + multipoint = <1>; + num_eps = <16>; + ram_bits = <12>; + }; }; }; diff --git a/arch/arm/boot/dts/omap4-panda.dts b/arch/arm/boot/dts/omap4-panda.dts index 7052422..dd19370 100644 --- a/arch/arm/boot/dts/omap4-panda.dts +++ b/arch/arm/boot/dts/omap4-panda.dts @@ -93,3 +93,9 @@ &twlusb { usb-supply = <&vusb>; }; + +&usb_otg_hs { + interface_type = <1>; + mode = <3>; + power = <50>; +}; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 6326d7c..0fc10d4 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -162,3 +162,9 @@ &twlusb { usb-supply = <&vusb>; }; + +&usb_otg_hs { + interface_type = <1>; + mode = <3>; + power = <50>; +}; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 4d2dcc1..a3ee0f9 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -285,5 +285,13 @@ <0x4a002300 0x1>; }; }; + + usb_otg_hs: usb_otg_hs@4a0ab000 { + compatible = "ti,omap4-musb"; + ti,hwmods = "usb_otg_hs"; + multipoint = <1>; + num_eps = <16>; + ram_bits = <12>; + }; }; }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 11/11] arm: omap: phy: remove unused functions from omap-phy-internal.c
All the unnessary functions in omap-phy-internal is removed. These functionality are now handled by omap-usb2 phy driver. Cc: Felipe Balbi Signed-off-by: Kishon Vijay Abraham I Acked-by: Tony Lindgren --- arch/arm/mach-omap2/omap_phy_internal.c | 138 --- arch/arm/mach-omap2/twl-common.c|5 - arch/arm/mach-omap2/usb-musb.c |3 - 3 files changed, 0 insertions(+), 146 deletions(-) diff --git a/arch/arm/mach-omap2/omap_phy_internal.c b/arch/arm/mach-omap2/omap_phy_internal.c index d52651a..874aecc 100644 --- a/arch/arm/mach-omap2/omap_phy_internal.c +++ b/arch/arm/mach-omap2/omap_phy_internal.c @@ -31,144 +31,6 @@ #include #include "control.h" -/* OMAP control module register for UTMI PHY */ -#define CONTROL_DEV_CONF 0x300 -#define PHY_PD 0x1 - -#define USBOTGHS_CONTROL 0x33c -#defineAVALID BIT(0) -#defineBVALID BIT(1) -#defineVBUSVALID BIT(2) -#defineSESSEND BIT(3) -#defineIDDIG BIT(4) - -static struct clk *phyclk, *clk48m, *clk32k; -static void __iomem *ctrl_base; -static int usbotghs_control; - -int omap4430_phy_init(struct device *dev) -{ - ctrl_base = ioremap(OMAP443X_SCM_BASE, SZ_1K); - if (!ctrl_base) { - pr_err("control module ioremap failed\n"); - return -ENOMEM; - } - /* Power down the phy */ - __raw_writel(PHY_PD, ctrl_base + CONTROL_DEV_CONF); - - if (!dev) { - iounmap(ctrl_base); - return 0; - } - - phyclk = clk_get(dev, "ocp2scp_usb_phy_ick"); - if (IS_ERR(phyclk)) { - dev_err(dev, "cannot clk_get ocp2scp_usb_phy_ick\n"); - iounmap(ctrl_base); - return PTR_ERR(phyclk); - } - - clk48m = clk_get(dev, "ocp2scp_usb_phy_phy_48m"); - if (IS_ERR(clk48m)) { - dev_err(dev, "cannot clk_get ocp2scp_usb_phy_phy_48m\n"); - clk_put(phyclk); - iounmap(ctrl_base); - return PTR_ERR(clk48m); - } - - clk32k = clk_get(dev, "usb_phy_cm_clk32k"); - if (IS_ERR(clk32k)) { - dev_err(dev, "cannot clk_get usb_phy_cm_clk32k\n"); - clk_put(phyclk); - clk_put(clk48m); - iounmap(ctrl_base); - return PTR_ERR(clk32k); - } - return 0; -} - -int omap4430_phy_set_clk(struct device *dev, int on) -{ - static int state; - - if (on && !state) { - /* Enable the phy clocks */ - clk_enable(phyclk); - clk_enable(clk48m); - clk_enable(clk32k); - state = 1; - } else if (state) { - /* Disable the phy clocks */ - clk_disable(phyclk); - clk_disable(clk48m); - clk_disable(clk32k); - state = 0; - } - return 0; -} - -int omap4430_phy_power(struct device *dev, int ID, int on) -{ - if (on) { - if (ID) - /* enable VBUS valid, IDDIG groung */ - __raw_writel(AVALID | VBUSVALID, ctrl_base + - USBOTGHS_CONTROL); - else - /* -* Enable VBUS Valid, AValid and IDDIG -* high impedance -*/ - __raw_writel(IDDIG | AVALID | VBUSVALID, - ctrl_base + USBOTGHS_CONTROL); - } else { - /* Enable session END and IDIG to high impedance. */ - __raw_writel(SESSEND | IDDIG, ctrl_base + - USBOTGHS_CONTROL); - } - return 0; -} - -int omap4430_phy_suspend(struct device *dev, int suspend) -{ - if (suspend) { - /* Disable the clocks */ - omap4430_phy_set_clk(dev, 0); - /* Power down the phy */ - __raw_writel(PHY_PD, ctrl_base + CONTROL_DEV_CONF); - - /* save the context */ - usbotghs_control = __raw_readl(ctrl_base + USBOTGHS_CONTROL); - } else { - /* Enable the internel phy clcoks */ - omap4430_phy_set_clk(dev, 1); - /* power on the phy */ - if (__raw_readl(ctrl_base + CONTROL_DEV_CONF) & PHY_PD) { - __raw_writel(~PHY_PD, ctrl_base + CONTROL_DEV_CONF); - mdelay(200); - } - - /* restore the context */ - __raw_writel(usbotghs_control, ctrl_base + USBOTGHS_CONTROL); - } - - return 0; -} - -int omap4430_phy_exit(struct device *dev) -{ - if (ctrl_base) - iounmap(ctrl_base); - if
[PATCH v2 03/11] drivers: usb: otg: make twl6030_usb as a comparator driver to omap_usb2
All the PHY configuration other than VBUS, ID GND and OTG SRP are removed from twl6030. The phy configurations are taken care by the dedicated usb2 phy driver. So twl6030 is made as comparator driver for VBUS and ID detection. Writing to control module which is now handled in omap2430.c should be removed once a driver for control module is in place. Signed-off-by: Kishon Vijay Abraham I --- drivers/usb/musb/omap2430.c | 52 --- drivers/usb/musb/omap2430.h |9 +++ drivers/usb/otg/twl6030-usb.c | 114 + 3 files changed, 67 insertions(+), 108 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 5fdb9da..addbebf 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -44,6 +44,7 @@ struct omap2430_glue { struct platform_device *musb; enum omap_musb_vbus_id_status status; struct work_struct omap_musb_mailbox_work; + u32 __iomem *control_otghs; }; #define glue_to_musb(g)platform_get_drvdata(g->musb) @@ -51,6 +52,26 @@ struct omap2430_glue *_glue; static struct timer_list musb_idle_timer; +/** + * omap4_usb_phy_mailbox - write to usb otg mailbox + * @glue: struct omap2430_glue * + * @val: the value to be written to the mailbox + * + * On detection of a device (ID pin is grounded), this API should be called + * to set AVALID, VBUSVALID and ID pin is grounded. + * + * When OMAP is connected to a host (OMAP in device mode), this API + * is called to set AVALID, VBUSVALID and ID pin in high impedance. + * + * XXX: This function will be removed once we have a seperate driver for + * control module + */ +static void omap4_usb_phy_mailbox(struct omap2430_glue *glue, u32 val) +{ + if (glue->control_otghs) + writel(val, glue->control_otghs); +} + static void musb_do_idle(unsigned long _musb) { struct musb *musb = (void *)_musb; @@ -245,6 +266,7 @@ EXPORT_SYMBOL_GPL(omap_musb_mailbox); static void omap_musb_set_mailbox(struct omap2430_glue *glue) { + u32 val; struct musb *musb = glue_to_musb(glue); struct device *dev = musb->controller; struct musb_hdrc_platform_data *pdata = dev->platform_data; @@ -260,7 +282,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) musb->xceiv->last_event = USB_EVENT_ID; if (!is_otg_enabled(musb) || musb->gadget_driver) { pm_runtime_get_sync(dev); - usb_phy_init(musb->xceiv); + val = AVALID | VBUSVALID; + omap4_usb_phy_mailbox(glue, val); omap2430_musb_set_vbus(musb, 1); } break; @@ -273,7 +296,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) musb->xceiv->last_event = USB_EVENT_VBUS; if (musb->gadget_driver) pm_runtime_get_sync(dev); - usb_phy_init(musb->xceiv); + val = IDDIG | AVALID | VBUSVALID; + omap4_usb_phy_mailbox(glue, val); break; case OMAP_MUSB_ID_FLOAT: @@ -291,7 +315,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue) if (musb->xceiv->otg->set_vbus) otg_set_vbus(musb->xceiv->otg, 0); } - usb_phy_shutdown(musb->xceiv); + val = SESSEND | IDDIG; + omap4_usb_phy_mailbox(glue, val); break; default: dev_dbg(dev, "ID float\n"); @@ -366,6 +391,7 @@ err1: static void omap2430_musb_enable(struct musb *musb) { u8 devctl; + u32 val; unsigned long timeout = jiffies + msecs_to_jiffies(1000); struct device *dev = musb->controller; struct omap2430_glue *glue = dev_get_drvdata(dev->parent); @@ -375,7 +401,8 @@ static void omap2430_musb_enable(struct musb *musb) switch (glue->status) { case OMAP_MUSB_ID_GROUND: - usb_phy_init(musb->xceiv); + val = AVALID | VBUSVALID; + omap4_usb_phy_mailbox(glue, val); if (data->interface_type != MUSB_INTERFACE_UTMI) break; devctl = musb_readb(musb->mregs, MUSB_DEVCTL); @@ -394,7 +421,8 @@ static void omap2430_musb_enable(struct musb *musb) break; case OMAP_MUSB_VBUS_VALID: - usb_phy_init(musb->xceiv); + val = IDDIG | AVALID | VBUSVALID; + omap4_usb_phy_mailbox(glue, val); break; default: @@ -404,11 +432,14 @@ static void omap2430_musb_enable(struct musb *musb) static void omap2430_musb_disable(struct musb *musb) { + u32 val; struct device *dev = musb->controller; struct omap2430_glue *glue = dev_ge
[PATCH v2 07/11] drivers: usb: twl4030: Add device tree support for twl4030 usb
Add device tree support for twl4030 usb driver. Update the Documentation with device tree binding information. Signed-off-by: Kishon Vijay Abraham I --- .../devicetree/bindings/usb/twl-usb.txt| 19 ++ drivers/usb/otg/twl4030-usb.c | 26 +++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/twl-usb.txt b/Documentation/devicetree/bindings/usb/twl-usb.txt index e3f6d73..c992b3b 100644 --- a/Documentation/devicetree/bindings/usb/twl-usb.txt +++ b/Documentation/devicetree/bindings/usb/twl-usb.txt @@ -20,3 +20,22 @@ Board specific device node entry &twl6030-usb { usb-supply = <&vusb>; }; + +TWL4030 USB PHY AND COMPARATOR + - compatible : Should be "ti,twl4030-usb" + - interrupts : The interrupt numbers to the cpu should be specified. First + interrupt number is the otg interrupt number that raises ID interrupts + and VBUS interrupts. The second interrupt number is optional. + - -supply : phandle to the regulator device tree node. +should be vusb1v5, vusb1v8 and vusb3v1 + - usb_mode : The mode used by the phy to connect to the controller. "1" + specifies "ULPI" mode and "2" specifies "CEA2011_3PIN" mode. + +twl4030-usb { + compatible = "ti,twl4030-usb"; + interrupts = < 10 4 >; + usb1v5-supply = <&vusb1v5>; + usb1v8-supply = <&vusb1v8>; + usb3v1-supply = <&vusb3v1>; + usb_mode = <1>; +}; diff --git a/drivers/usb/otg/twl4030-usb.c b/drivers/usb/otg/twl4030-usb.c index 523cad5..f0d2e75 100644 --- a/drivers/usb/otg/twl4030-usb.c +++ b/drivers/usb/otg/twl4030-usb.c @@ -585,23 +585,28 @@ static int __devinit twl4030_usb_probe(struct platform_device *pdev) struct twl4030_usb *twl; int status, err; struct usb_otg *otg; - - if (!pdata) { - dev_dbg(&pdev->dev, "platform_data not available\n"); - return -EINVAL; - } + struct device_node *np = pdev->dev.of_node; twl = devm_kzalloc(&pdev->dev, sizeof *twl, GFP_KERNEL); if (!twl) return -ENOMEM; + if (np) + of_property_read_u32(np, "usb_mode", + (enum twl4030_usb_mode *)&twl->usb_mode); + else if (pdata) + twl->usb_mode = pdata->usb_mode; + else { + dev_err(&pdev->dev, "twl4030 initialized without pdata\n"); + return -EINVAL; + } + otg = devm_kzalloc(&pdev->dev, sizeof *otg, GFP_KERNEL); if (!otg) return -ENOMEM; twl->dev= &pdev->dev; twl->irq= platform_get_irq(pdev, 0); - twl->usb_mode = pdata->usb_mode; twl->vbus_supplied = false; twl->asleep = 1; twl->linkstat = OMAP_MUSB_UNKNOWN; @@ -690,12 +695,21 @@ static int __exit twl4030_usb_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF +static const struct of_device_id twl4030_usb_id_table[] = { + { .compatible = "ti,twl4030-usb" }, + {} +}; +MODULE_DEVICE_TABLE(of, twl4030_usb_id_table); +#endif + static struct platform_driver twl4030_usb_driver = { .probe = twl4030_usb_probe, .remove = __exit_p(twl4030_usb_remove), .driver = { .name = "twl4030_usb", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(twl4030_usb_id_table), }, }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 09/11] drivers: usb: musb: Add device tree support for omap musb glue
Added device tree support for omap musb driver and updated the Documentation with device tree binding information. Signed-off-by: Kishon Vijay Abraham I --- Documentation/devicetree/bindings/usb/omap-usb.txt | 34 - drivers/usb/musb/omap2430.c| 55 2 files changed, 88 insertions(+), 1 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt index 80a28c9..39cdffb 100644 --- a/Documentation/devicetree/bindings/usb/omap-usb.txt +++ b/Documentation/devicetree/bindings/usb/omap-usb.txt @@ -1,4 +1,4 @@ -OMAP USB PHY +OMAP USB PHY AND GLUE OMAP USB2 PHY @@ -14,3 +14,35 @@ usb2phy@0x4a0ad080 { compatible = "ti,omap-usb2"; reg = <0x4a0ad080 0x58>; }; + +OMAP MUSB GLUE + - compatible : Should be "ti,musb-omap2430" + - ti,hwmods : must be "usb_otg_hs" + - multipoint : Should be "1" indicating the musb controller supports + multipoint. This is a MUSB configuration-specific setting. + - num_eps : Specifies the number of endpoints. This is also a + MUSB configuration-specific setting. Should be set to "16" + - ram_bits : Specifies the ram address size. Should be set to "12" + - interface_type : This is a board specific setting to describe the type of + interface between the controller and the phy. It should be "0" or "1" + specifying ULPI and UTMI respectively. + - mode : Should be "3" to represent OTG. "1" signifies HOST and "2" + represents PERIPHERAL. + - power : Should be "50". This signifies the controller can supply upto + 100mA when operating in host mode. + +SOC specific device node entry +usb_otg_hs: usb_otg_hs@4a0ab000 { + compatible = "ti,musb-omap2430"; + ti,hwmods = "usb_otg_hs"; + multipoint = <1>; + num_eps = <16>; + ram_bits = <12>; +}; + +Board specific device node entry +&usb_otg_hs { + interface_type = <1>; + mode = <3>; + power = <50>; +}; diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index addbebf..331e477 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -469,8 +470,11 @@ static u64 omap2430_dmamask = DMA_BIT_MASK(32); static int __devinit omap2430_probe(struct platform_device *pdev) { struct musb_hdrc_platform_data *pdata = pdev->dev.platform_data; + struct omap_musb_board_data *data; struct platform_device *musb; struct omap2430_glue*glue; + struct device_node *np = pdev->dev.of_node; + struct musb_hdrc_config *config; struct resource *res; int ret = -ENOMEM; @@ -500,6 +504,43 @@ static int __devinit omap2430_probe(struct platform_device *pdev) if (glue->control_otghs == NULL) dev_dbg(&pdev->dev, "Failed to obtain control memory\n"); + if (np) { + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(&pdev->dev, + "failed to allocate musb platfrom data\n"); + ret = -ENOMEM; + goto err1; + } + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, + "failed to allocate musb board data\n"); + ret = -ENOMEM; + goto err1; + } + + config = devm_kzalloc(&pdev->dev, sizeof(*config), GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, + "failed to allocate musb hdrc config\n"); + goto err1; + } + + of_property_read_u32(np, "mode", (u32 *)&pdata->mode); + of_property_read_u32(np, "interface_type", + (u32 *)&data->interface_type); + of_property_read_u32(np, "num_eps", (u32 *)&config->num_eps); + of_property_read_u32(np, "ram_bits", (u32 *)&config->ram_bits); + of_property_read_u32(np, "mode", (u32 *)&pdata->mode); + of_property_read_u32(np, "power", (u32 *)&pdata->power); + config->multipoint = of_property_read_bool(np, "multipoint"); + + pdata->board_data = data; + pdata->config = config; + } + pdata->platform_ops = &omap2430_ops; platform_set_drvdata(pdev, glue); @@ -597,12 +638,26 @@ static struct dev_pm_ops omap2430_pm_ops = { #define DEV_PM_OPS NULL #endif +#ifdef CONFIG_OF +static const struct of_device_id omap2430_id_table[] = { + { + .comp
[PATCH v2 01/11] drivers: usb: otg: add a new driver for omap usb2 phy
All phy related programming like enabling/disabling the clocks, powering on/off the phy is taken care of by this driver. It is also used for OTG related functionality like srp. This also includes device tree support for usb2 phy driver and the documentation with device tree binding information is updated. Currently writing to control module register is taken care in this driver which will be removed once the control module driver is in place. Cc: Felipe Balbi Signed-off-by: Kishon Vijay Abraham I --- .../devicetree/bindings/bus/omap-ocp2scp.txt |3 + Documentation/devicetree/bindings/usb/omap-usb.txt | 16 ++ drivers/usb/otg/Kconfig| 10 + drivers/usb/otg/Makefile |1 + drivers/usb/otg/omap-usb2.c| 271 include/linux/usb/omap_usb.h | 45 include/linux/usb/phy_companion.h | 34 +++ 7 files changed, 380 insertions(+), 0 deletions(-) create mode 100644 Documentation/devicetree/bindings/usb/omap-usb.txt create mode 100644 drivers/usb/otg/omap-usb2.c create mode 100644 include/linux/usb/omap_usb.h create mode 100644 include/linux/usb/phy_companion.h diff --git a/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt index d2fe064..bb0c7f4 100644 --- a/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt +++ b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt @@ -8,3 +8,6 @@ properties: Sub-nodes: All the devices connected to ocp2scp are described using sub-node to ocp2scp +- usb2phy : + The binding details of usb2phy can be found in: + Documentation/devicetree/bindings/usb/omap-usb.txt diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt new file mode 100644 index 000..80a28c9 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/omap-usb.txt @@ -0,0 +1,16 @@ +OMAP USB PHY + +OMAP USB2 PHY + +Required properties: + - compatible: Should be "ti,omap-usb2" + - reg : Address and length of the register set for the device. Also +add the address of control module dev conf register until a driver for +control module is added + +This is usually a subnode of ocp2scp to which it is connected. + +usb2phy@0x4a0ad080 { + compatible = "ti,omap-usb2"; + reg = <0x4a0ad080 0x58>; +}; diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig index 5c87db0..c751db7 100644 --- a/drivers/usb/otg/Kconfig +++ b/drivers/usb/otg/Kconfig @@ -78,6 +78,16 @@ config TWL6030_USB are hooked to this driver through platform_data structure. The definition of internal PHY APIs are in the mach-omap2 layer. +config OMAP_USB2 + tristate "OMAP USB2 PHY Driver" + depends on OMAP_OCP2SCP + select USB_OTG_UTILS + help + Enable this to support the transceiver that is part of SOC. This + driver takes care of all the PHY functionality apart from comparator. + The USB OTG controller communicates with the comparator using this + driver. + config NOP_USB_XCEIV tristate "NOP USB Transceiver Driver" select USB_OTG_UTILS diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile index 41aa509..2c2a3ca 100644 --- a/drivers/usb/otg/Makefile +++ b/drivers/usb/otg/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_USB_GPIO_VBUS) += gpio_vbus.o obj-$(CONFIG_ISP1301_OMAP) += isp1301_omap.o obj-$(CONFIG_TWL4030_USB) += twl4030-usb.o obj-$(CONFIG_TWL6030_USB) += twl6030-usb.o +obj-$(CONFIG_OMAP_USB2)+= omap-usb2.o obj-$(CONFIG_NOP_USB_XCEIV)+= nop-usb-xceiv.o obj-$(CONFIG_USB_ULPI) += ulpi.o obj-$(CONFIG_USB_ULPI_VIEWPORT)+= ulpi_viewport.o diff --git a/drivers/usb/otg/omap-usb2.c b/drivers/usb/otg/omap-usb2.c new file mode 100644 index 000..4a133cf --- /dev/null +++ b/drivers/usb/otg/omap-usb2.c @@ -0,0 +1,271 @@ +/* + * omap-usb2.c - USB PHY, talking to musb controller in OMAP. + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Author: Kishon Vijay Abraham I + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * omap_usb2_set_comparator - links the comparator present in the sytem with + * this phy + * @comparator - the companion phy(comparator) for this phy +
[PATCH v2 08/11] arm/dts: Add twl4030-usb data
Add twl4030-usb data node in twl4030 device tree file. Signed-off-by: Kishon Vijay Abraham I --- arch/arm/boot/dts/twl4030.dtsi | 21 + 1 files changed, 21 insertions(+), 0 deletions(-) diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi index 22f4d13..761a5a5 100644 --- a/arch/arm/boot/dts/twl4030.dtsi +++ b/arch/arm/boot/dts/twl4030.dtsi @@ -37,6 +37,18 @@ regulator-max-microvolt = <315>; }; + vusb1v5: regulator-vusb1v5 { + compatible = "ti,twl4030-vusb1v5"; + }; + + vusb1v8: regulator-vusb1v8 { + compatible = "ti,twl4030-vusb1v8"; + }; + + vusb3v1: regulator-vusb3v1 { + compatible = "ti,twl4030-vusb3v1"; + }; + twl_gpio: gpio { compatible = "ti,twl4030-gpio"; gpio-controller; @@ -44,4 +56,13 @@ interrupt-controller; #interrupt-cells = <1>; }; + + twl4030-usb { + compatible = "ti,twl4030-usb"; + interrupts = < 10 4 >; + usb1v5-supply = <&vusb1v5>; + usb1v8-supply = <&vusb1v8>; + usb3v1-supply = <&vusb3v1>; + usb_mode = <1>; + }; }; -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: mm,numad,rcu: hang on OOM
Hi Paul, I've been running with your patch below for a while now, and haven't encountered the issue again. On 07/01/2012 03:15 AM, Paul E. McKenney wrote: > On Fri, Jun 29, 2012 at 06:44:41PM +0200, Sasha Levin wrote: >> Hi all, >> >> While fuzzing using trinity on a KVM tools guest with todays linux-next, >> I've hit the following lockup: >> >> [ 362.261729] INFO: task numad/2:27 blocked for more than 120 seconds. >> [ 362.263974] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables >> this message. >> [ 362.271684] numad/2 D 0001 567227 2 >> 0x >> [ 362.280052] 8800294c7c58 0046 8800294c7c08 >> 81163dba >> [ 362.294477] 8800294c6000 8800294c6010 8800294c7fd8 >> 8800294c6000 >> [ 362.306631] 8800294c6010 8800294c7fd8 88000d5c3000 >> 8800294c8000 >> [ 362.315395] Call Trace: >> [ 362.318556] [] ? __lock_release+0x1ba/0x1d0 >> [ 362.325411] [] schedule+0x55/0x60 >> [ 362.328844] [] rwsem_down_failed_common+0xf5/0x130 >> [ 362.332501] [] ? put_lock_stats+0xe/0x40 >> [ 362.334496] [] ? __lock_contended+0x1f5/0x230 >> [ 362.336723] [] rwsem_down_read_failed+0x15/0x17 >> [ 362.339297] [] call_rwsem_down_read_failed+0x14/0x30 >> [ 362.341768] [] ? down_read+0x79/0xa0 >> [ 362.343669] [] ? lazy_migrate_process+0x22/0x60 >> [ 362.345616] [] lazy_migrate_process+0x22/0x60 >> [ 362.347464] [] process_mem_migrate+0x10/0x20 >> [ 362.349340] [] move_processes+0x190/0x230 >> [ 362.351398] [] numad_thread+0x7a/0x120 >> [ 362.353245] [] ? find_busiest_node+0x310/0x310 >> [ 362.355396] [] kthread+0xb2/0xc0 >> [ 362.356996] [] kernel_thread_helper+0x4/0x10 >> [ 362.359253] [] ? retint_restore_args+0x13/0x13 >> [ 362.361168] [] ? __init_kthread_worker+0x70/0x70 >> [ 362.363277] [] ? gs_change+0x13/0x13 >> >> I've hit sysrq-t to see what might be the cause, and it appears that an OOM >> was in progress, and was stuck on RCU: >> >> [ 578.086230] trinity-child69 D 8800277a54c8 3968 6658 6580 >> 0x >> [ 578.086230] 880022c5f518 0046 880022c5f4c8 >> 88001b9d6e00 >> [ 578.086230] 880022c5e000 880022c5e010 880022c5ffd8 >> 880022c5e000 >> [ 578.086230] 880022c5e010 880022c5ffd8 880023c08000 >> 880022c33000 >> [ 578.086230] Call Trace: >> [ 578.086230] [] schedule+0x55/0x60 >> [ 578.086230] [] schedule_timeout+0x38/0x2c0 >> [ 578.086230] [] ? mark_held_locks+0xf6/0x120 >> [ 578.086230] [] ? __lock_release+0x1ba/0x1d0 >> [ 578.086230] [] ? _raw_spin_unlock_irq+0x2b/0x80 >> [ 578.086230] [] wait_for_common+0xff/0x170 >> [ 578.086230] [] ? try_to_wake_up+0x290/0x290 >> [ 578.086230] [] wait_for_completion+0x18/0x20 >> [ 578.086230] [] _rcu_barrier+0x4a7/0x4e0 > > Hmmm... Perhaps a blocking operation is not appropriate here. I have > substituted a nonblocking approach, which is at -rcu (thus soon -next) > at 1ee4c09d (Provide OOM handler to motivate lazy RCU callbacks). > Patch below. > > Thanx, Paul > > > > rcu: Provide OOM handler to motivate lazy RCU callbacks > > In kernels built with CONFIG_RCU_FAST_NO_HZ=y, CPUs can accumulate a > large number of lazy callbacks, which as the name implies will be slow > to be invoked. This can be a problem on small-memory systems, where the > default 6-second sleep for CPUs having only lazy RCU callbacks could well > be fatal. This commit therefore installs an OOM hander that ensures that > every CPU with non-lazy callbacks has at least one non-lazy callback, > in turn ensuring timely advancement for these callbacks. > > Signed-off-by: Paul E. McKenney > Signed-off-by: Paul E. McKenney > > diff --git a/kernel/rcutree.h b/kernel/rcutree.h > index 4b47fbe..dab279f 100644 > --- a/kernel/rcutree.h > +++ b/kernel/rcutree.h > @@ -314,8 +314,11 @@ struct rcu_data { > unsigned long n_rp_need_fqs; > unsigned long n_rp_need_nothing; > > - /* 6) _rcu_barrier() callback. */ > + /* 6) _rcu_barrier() and OOM callbacks. */ > struct rcu_head barrier_head; > +#ifdef CONFIG_RCU_FAST_NO_HZ > + struct rcu_head oom_head; > +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ > > int cpu; > struct rcu_state *rsp; > diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h > index 81e53eb..1908847 100644 > --- a/kernel/rcutree_plugin.h > +++ b/kernel/rcutree_plugin.h > @@ -25,6 +25,7 @@ > */ > > #include > +#include > > #define RCU_KTHREAD_PRIO 1 > > @@ -2128,6 +2129,90 @@ static void rcu_idle_count_callbacks_posted(void) > __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); > } > > +/* > + * Data for flushing lazy RCU callbacks at OOM time. > + */ > +static atomic_t oom_callback_count; > +static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); > + > +/* > + * RCU OOM callba
sched, debug: INFO: possible irq lock inversion dependency detected
Hi all, While fuzzing using trinity inside a KVM tools guest using the latest linux-next, I've encountered a hang within the guest. When I've tried dumping tasks using sysrq-t I got the following: [ 138.777386] = [ 138.777386] [ INFO: possible irq lock inversion dependency detected ] [ 138.777386] 3.5.0-rc7-next-20120716-sasha-dirty #225 Tainted: GW [ 138.777386] - [ 138.777386] swapper/0/0 just changed the state of lock: [ 138.777386] (tasklist_lock){.?.+..}, at: [] print_rq+0x53/0x190 [ 138.777386] but this lock took another, HARDIRQ-unsafe lock in the past: [ 138.777386] (&(&p->alloc_lock)->rlock){+.+...} and interrupts could create inverse lock ordering between them. [ 138.777386] [ 138.777386] other info that might help us debug this: [ 138.777386] Possible interrupt unsafe locking scenario: [ 138.777386] [ 138.777386]CPU0CPU1 [ 138.777386] [ 138.777386] lock(&(&p->alloc_lock)->rlock); [ 138.777386]local_irq_disable(); [ 138.777386]lock(tasklist_lock); [ 138.777386]lock(&(&p->alloc_lock)->rlock); [ 138.777386] [ 138.777386] lock(tasklist_lock); [ 138.777386] [ 138.777386] *** DEADLOCK *** [ 138.777386] [ 138.777386] 6 locks held by swapper/0/0: [ 138.777386] #0: (&(&i->lock)->rlock){-.-...}, at: [] serial8250_interrupt+0x2c/0xd0 [ 138.777386] #1: (&port_lock_key){-.-...}, at: [] serial8250_handle_irq+0x23/0x80 [ 138.777386] #2: (sysrq_key_table_lock){-.}, at: [] __handle_sysrq+0x2d/0x180 [ 138.777386] #3: (rcu_read_lock){.+.+..}, at: [] show_state_filter+0x0/0x220 [ 138.777386] #4: (sched_debug_lock){-.}, at: [] print_cpu+0x5fc/0x710 [ 138.777386] #5: (rcu_read_lock){.+.+..}, at: [] print_cpu+0x5f0/0x710 [ 138.777386] [ 138.777386] the shortest dependencies between 2nd lock and 1st lock: [ 138.777386] -> (&(&p->alloc_lock)->rlock){+.+...} ops: 83940 { [ 138.777386] HARDIRQ-ON-W at: [ 138.777386] [] mark_irqflags+0x100/0x190 [ 138.777386] [] __lock_acquire+0x92b/0xb50 [ 138.777386] [] lock_acquire+0x1ca/0x270 [ 138.777386] [] _raw_spin_lock+0x3b/0x70 [ 138.777386] [] set_task_comm+0x31/0x1c0 [ 138.777386] [] kthreadd+0x2c/0x170 [ 138.777386] [] kernel_thread_helper+0x4/0x10 [ 138.777386] SOFTIRQ-ON-W at: [ 138.777386] [] mark_irqflags+0x123/0x190 [ 138.777386] [] __lock_acquire+0x92b/0xb50 [ 138.777386] [] lock_acquire+0x1ca/0x270 [ 138.777386] [] _raw_spin_lock+0x3b/0x70 [ 138.777386] [] set_task_comm+0x31/0x1c0 [ 138.777386] [] kthreadd+0x2c/0x170 [ 138.777386] [] kernel_thread_helper+0x4/0x10 [ 138.777386] INITIAL USE at: [ 138.777386] [] __lock_acquire+0x954/0xb50 [ 138.777386] [] lock_acquire+0x1ca/0x270 [ 138.777386] [] _raw_spin_lock+0x3b/0x70 [ 138.777386] [] set_task_comm+0x31/0x1c0 [ 138.777386] [] kthreadd+0x2c/0x170 [ 138.777386] [] kernel_thread_helper+0x4/0x10 [ 138.777386] } [ 138.777386] ... key at: [] __key.45660+0x0/0x8 [ 138.777386] ... acquired at: [ 138.777386][] check_prevs_add+0xba/0x1a0 [ 138.777386][] validate_chain.isra.22+0x6a0/0x7b0 [ 138.777386][] __lock_acquire+0xa8d/0xb50 [ 138.777386][] lock_acquire+0x1ca/0x270 [ 138.777386][] _raw_spin_lock+0x3b/0x70 [ 138.777386][] keyctl_session_to_parent+0x105/0x3f0 [ 138.777386][] sys_keyctl+0x155/0x1a0 [ 138.777386][] system_call_fastpath+0x1a/0x1f [ 138.777386] [ 138.777386] -> (tasklist_lock){.?.+..} ops: 31775 { [ 138.777386]IN-HARDIRQ-R at: [ 138.777386] [] mark_irqflags+0x41/0x190 [ 138.777386] [] __lock_acquire+0x92b/0xb50 [ 138.777386] [] lock_acquire+0x1ca/0x270 [ 138.777386] [] _raw_read_lock_irqsave+0x7c/0xc0 [ 138.777386] [] print_rq+0x53/0x190 [ 138.777386] [] print_cpu+0x68e/0x710 [ 138.777386] [] sched_debug_show+0x682/0x720 [ 138.777386] [] sysrq_sched_debug_show+0xd/0x10 [ 138.777386] [] show_state_filter+0x19e/0x220 [ 138.777386] [] sysrq_handle_showstate+0xb/0x10 [ 138.777386] [] __handle_sysrq+0xb7/0x180 [ 138.777386] [] handle_sysrq+0x21/0x30 [ 138.777386]
Re: [PATCH] timekeeping: Add missing update call in timekeeping_resume()
I run into the same regression in s2ram between v3.5-rc6 and v3.5-rc7. This patch helps. Thanks. Tested-by: Konstantin Khlebnikov John Stultz wrote: From: Thomas Gleixner Linus, Since Thomas is on vacation I wanted to send you his fix to resolve the suspend regression from the leapsecond fixes. thanks -john The leap second rework unearthed another issue of inconsistent data. On timekeeping_resume() the timekeeper data is updated, but nothing calls timekeeping_update(), so now the update code in the timer interrupt sees stale values. This has been the case before those changes, but then the timer interrupt was using stale data as well so this went unnoticed for quite some time. Add the missing update call, so all the data is consistent everywhere. Reported-by: Andreas Schwab Reported-by-and-tested-by: "Rafael J. Wysocki" CC: LKML Cc: Linux PM list Cc: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra, Cc: Prarit Bhargava Cc: sta...@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/timekeeping.c |1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 269b1fe..3447cfa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -717,6 +717,7 @@ static void timekeeping_resume(void) timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_sequnlock_irqrestore(&timekeeper.lock, flags); touch_softlockup_watchdog(); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFC V4 2/3] kvm: Note down when cpu relax intercepted or pause loop exited
On 07/16/2012 08:24 PM, Raghavendra K T wrote: > So are you saying allow vcpu to spin in non over-commit scenarios? So > that we avoid all yield_to etc... > > ( Or even in some other place where it is useful). When is yielding useful, if you're not overcommitted? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[char-misc-next] mei: use module_pci_driver
Signed-off-by: Tomas Winkler --- drivers/misc/mei/main.c | 39 +-- 1 files changed, 1 insertions(+), 38 deletions(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 0d98c2e..0923302 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -1182,44 +1182,7 @@ static struct pci_driver mei_driver = { .driver.pm = MEI_PM_OPS, }; -/** - * mei_init_module - Driver Registration Routine - * - * mei_init_module is the first routine called when the driver is - * loaded. All it does is to register with the PCI subsystem. - * - * returns 0 on success, <0 on failure. - */ -static int __init mei_init_module(void) -{ - int ret; - - pr_debug("loading.\n"); - /* init pci module */ - ret = pci_register_driver(&mei_driver); - if (ret < 0) - pr_err("error registering driver.\n"); - - return ret; -} - -module_init(mei_init_module); - -/** - * mei_exit_module - Driver Exit Cleanup Routine - * - * mei_exit_module is called just before the driver is removed - * from memory. - */ -static void __exit mei_exit_module(void) -{ - pci_unregister_driver(&mei_driver); - - pr_debug("unloaded successfully.\n"); -} - -module_exit(mei_exit_module); - +module_pci_driver(mei_driver); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); -- 1.7.4.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[char-misc-next] mei: use module_pci_driver
Change-Id: I960feed4157ad1b178f68f4a0b3968e185ed0b36 Signed-off-by: Tomas Winkler --- drivers/misc/mei/main.c | 39 +-- 1 files changed, 1 insertions(+), 38 deletions(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 0d98c2e..0923302 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -1182,44 +1182,7 @@ static struct pci_driver mei_driver = { .driver.pm = MEI_PM_OPS, }; -/** - * mei_init_module - Driver Registration Routine - * - * mei_init_module is the first routine called when the driver is - * loaded. All it does is to register with the PCI subsystem. - * - * returns 0 on success, <0 on failure. - */ -static int __init mei_init_module(void) -{ - int ret; - - pr_debug("loading.\n"); - /* init pci module */ - ret = pci_register_driver(&mei_driver); - if (ret < 0) - pr_err("error registering driver.\n"); - - return ret; -} - -module_init(mei_init_module); - -/** - * mei_exit_module - Driver Exit Cleanup Routine - * - * mei_exit_module is called just before the driver is removed - * from memory. - */ -static void __exit mei_exit_module(void) -{ - pci_unregister_driver(&mei_driver); - - pr_debug("unloaded successfully.\n"); -} - -module_exit(mei_exit_module); - +module_pci_driver(mei_driver); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); -- 1.7.4.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
On 07/16/2012 07:58 PM, Stefan Hajnoczi wrote: On Thu, Jul 12, 2012 at 4:35 PM, Asias He wrote: This patchset adds vhost-blk support. vhost-blk is a in kernel virito-blk device accelerator. Compared to userspace virtio-blk implementation, vhost-blk gives about 5% to 15% performance improvement. Why is it 5-15% faster? vhost-blk and the userspace virtio-blk you benchmarked should be doing basically the same thing: 1. An eventfd file descriptor is signalled when the vring has new requests available from the guest. 2. A thread wakes up and processes the virtqueue. 3. Linux AIO is used to issue host I/O. 4. An interrupt is injected into the guest. Yes. This is how both of them work. Though, there are some differences in details. e.g. In vhost-blk, we use the vhost's work infrastructure to handle the requests. In kvm tool, we use a dedicated thread. In vhost-blk, we use irqfd to inject interrupts. In kvm tool, we use ioctl to inject interrupts. Does the vhost-blk implementation do anything fundamentally different from userspace? Where is the overhead that userspace virtio-blk has? Currently, no. But we could play with bio directly in vhost-blk as Christoph suggested which could make the IO path from guest to host's real storage even shorter in vhost-blk. I've been trying my best to reduce the overhead of virtio-blk at kvm tool side. I do not see any significant overhead out there. Compared to vhost-blk, the overhead we have in userspace virito-blk is syscalls. In each IO request, we have epoll_wait() & read(): wait for the eventfd which guest notifies us io_submit(): submit the aio read(): read the aio complete eventfd io_getevents(): reap the aio complete result ioctl(): trigger the interrupt So, vhost-blk at least saves ~6 syscalls for us in each request. I'm asking because it would be beneficial to fix the overhead (especially it that could speed up all userspace applications) instead of adding a special-purpose kernel module to work around the overhead. I guess you mean qemu here. Yes, in theory, qemu's block layer can be improved to achieve similar performance as vhost-blk or kvm tool's userspace virito-blk has. But I think it makes no sense to prevent one solution becase there is another in theory solution called: we can do similar in qemu. What do you mean by specail-purpose here, we need general-purpose kernel module? Is vhost-net a special purpose kernel module? Is xen-blkback a special-purpose kernel module? And I think vhost-blk is beneficial to qemu too, as well as any other kvm host side implementation. -- Asias -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFC V4 3/3] kvm: Choose better candidate for directed yield
On 07/16/2012 07:10 PM, Rik van Riel wrote: > On 07/16/2012 06:07 AM, Avi Kivity wrote: > >>> +{ >>> +bool eligible; >>> + >>> +eligible = !vcpu->ple.cpu_relax_intercepted || >>> +(vcpu->ple.cpu_relax_intercepted&& >>> + vcpu->ple.dy_eligible); >>> + >>> +if (vcpu->ple.cpu_relax_intercepted) >>> +vcpu->ple.dy_eligible = !vcpu->ple.dy_eligible; >> >> Probably should assign 'true', since the previous value is essentially >> random. > > I suspect the intended purpose of this conditional is to > flip the eligibility of a vcpu for being selected as a > direct yield target. > > In other words, that bit of the code is correct. If vcpu A is in a long spin loop and is preempted away, and vcpu B dips several times in kvm_vcpu_on_spin(), then it will act as intended. But if vcpu A is spinning for x% of its time and processing on the other, then vcpu B will flip its dy_eligible for those x%, and not flip it when it's processing. I don't understand how this is useful. I guess this is an attempt to impose fairness on yielding, and it makes sense to do this, but I don't know if this is the best way to achieve it. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFC V4 2/3] kvm: Note down when cpu relax intercepted or pause loop exited
On 07/17/2012 01:52 PM, Avi Kivity wrote: On 07/16/2012 08:24 PM, Raghavendra K T wrote: So are you saying allow vcpu to spin in non over-commit scenarios? So that we avoid all yield_to etc... ( Or even in some other place where it is useful). When is yielding useful, if you're not overcommitted? Right. There is no need to do yield_to when run queue has only one task. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
Il 17/07/2012 09:45, James Bottomley ha scritto: > On Mon, 2012-07-16 at 19:20 +0200, Paolo Bonzini wrote: >> Il 16/07/2012 18:18, James Bottomley ha scritto: > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index b583277..6d8ca08 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -843,8 +843,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, > unsigned int good_bytes) > } else if (sense_valid && !sense_deferred) { > switch (sshdr.sense_key) { > case UNIT_ATTENTION: > - if (cmd->device->removable) { > - /* Detected disc change. Set a bit > + if (cmd->device->removable && > + (sshdr.asc == 0x3a || > + (sshdr.asc == 0x28 && sshdr.ascq == 0x00))) { > + /* "No medium" or "Medium may have changed." > + * This means a disc change. Set a bit >>> This type of change would likely cause a huge cascade of errors in real >>> removable media devices. Under the MMC standards, which a lot of the >>> older removable discs seem to follow, UNIT ATTENTION indicates either >>> medium change or device reset (which we check for and eat lower down); >>> we can't rely on them giving proper SBC-2 sense codes. If you want to >>> pretend to be removable media, you have to conform to its standards. >> >> Would you accept a patch doing the opposite, i.e. passing some sense >> codes such as PARAMETERS CHANGED and TARGET OPERATING CONDITIONS HAVE >> CHANGED? > > Could you explain what the problem actually is? It looks like you had a > reason to mark virtio-scsi as removable, even though it isn't, and now > you want to add further hacks because being removable doesn't quite > work. It's not specific to virtio-scsi, in fact I expect that virtio-scsi will be almost always used with non-removable disks. However, QEMU's SCSI target is not used just for virtio-scsi (for example it can be used for USB storage), and it lets you mark a disk as removable---why? because there exists real hardware that presents itself as an SBC removable disk. The only thing that is specific to virtualization, is support for online resizing (which generates a unit attention condition CAPACITY DATA HAS CHANGED). Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
On Mon, Jul 16, 2012 at 09:24:26AM +0100, Avi Kivity wrote: > On 07/15/2012 03:16 PM, Catalin Marinas wrote: > > > > The AArch32 execution mode is optional, so it depends on the actual CPU > > implementation (while AArch64 is mandatory). If the implementation > > supports it, the most likely scenario for AArch32 at kernel level is in > > virtual machines or the secure OS. I'll explain below why. > > > > The exception (or privilege) levels on an ARMv8 architecture look like > > this: > > > > Secure WorldNormal World > > +-+ > > | EL3 | - Secure monitor > > +-+ > > +-+ > > | EL2 | - Hypervisor (normal world only) > > +-+ > > +-+ +-+ > > | EL1 | | EL1 | - OS kernel (secure or normal) > > +-+ +-+ > > +-+ +-+ > > | EL0 | | EL0 | - User apps (secure or normal) > > +-+ +-+ > > Can the same kernel image run in both EL1 and EL2? I noticed some .if > ELs in the assembler files. I guess they could be compiled multiple > times and the correct version chosen at runtime, or patched up like x86 > does with alternative(). > > One of the advantages kvm has to Linux distributors is that the same > kernel image can be used the hypervisor, guest, and bare metal. I'd > like to preserve that for arm64. On ARM (the same on 32-bit) we can also run the same kernel image as hypervisor or guest. Linux detects whether it was started in EL2 and installs some layer for KVM to use later if needed and then drops to EL1. If started in EL1, it doesn't have access to the virtualisation features and it just runs as a guest. The kernel cannot run in EL2 permanently as it cannot access user space (EL2 has its own MMU tables, though compatible with the other levels). -- Catalin -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH] snd-opti9xx: Implement suspend/resume
Hello Ondrej, On 7/17/2012 12:46 PM, Ondrej Zary wrote: Implement suspend/resume support for Opti 92x and 93x chips. Tested with Opti 929A+AD1848 and Opti 931. Signed-off-by: Ondrej Zary +#ifdef CONFIG_PM +static int snd_opti9xx_suspend(struct snd_card *card) +{ + struct snd_opti9xx *chip = card->private_data; + + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + chip->codec->suspend(chip->codec); + return 0; +} + +static int snd_opti9xx_resume(struct snd_card *card) +{ + struct snd_opti9xx *chip = card->private_data; + int error, xdma2; +#if defined(CS4231) || defined(OPTi93X) + xdma2 = dma2; +#else + xdma2 = -1; +#endif + + error = snd_opti9xx_configure(chip, port, irq, dma1, xdma2, + mpu_port, mpu_irq); dma1?? + if (error) + return error; Variable name is not convincing. You are assuming that the function is returning error. + chip->codec->resume(chip->codec); + snd_power_change_state(card, SNDRV_CTL_POWER_D0); blank line required. + return 0; +} + +static int snd_opti9xx_isa_suspend(struct device *dev, unsigned int n, + pm_message_t state) +{ + return snd_opti9xx_suspend(dev_get_drvdata(dev)); +} + +static int snd_opti9xx_isa_resume(struct device *dev, unsigned int n) +{ + return snd_opti9xx_resume(dev_get_drvdata(dev)); +} +#endif + static struct isa_driver snd_opti9xx_driver = { .match = snd_opti9xx_isa_match, .probe = snd_opti9xx_isa_probe, .remove = __devexit_p(snd_opti9xx_isa_remove), - /* FIXME: suspend/resume */ +#ifdef CONFIG_PM + .suspend= snd_opti9xx_isa_suspend, + .resume = snd_opti9xx_isa_resume, +#endif .driver = { .name = DEV_NAME }, @@ -1123,12 +1165,29 @@ static void __devexit snd_opti9xx_pnp_remove(struct pnp_card_link * pcard) snd_opti9xx_pnp_is_probed = 0; } +#ifdef CONFIG_PM +static int snd_opti9xx_pnp_suspend(struct pnp_card_link *pcard, + pm_message_t state) +{ + return snd_opti9xx_suspend(pnp_get_card_drvdata(pcard)); +} + +static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) +{ + return snd_opti9xx_resume(pnp_get_card_drvdata(pcard)); +} +#endif + static struct pnp_card_driver opti9xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, .name = "opti9xx", Why this is opti why not OPTi ~Rajeev .id_table = snd_opti9xx_pnpids, .probe = snd_opti9xx_pnp_probe, .remove = __devexit_p(snd_opti9xx_pnp_remove), +#ifdef CONFIG_PM + .suspend= snd_opti9xx_pnp_suspend, + .resume = snd_opti9xx_pnp_resume, +#endif }; #endif -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
On Tue, 2012-07-17 at 10:34 +0200, Paolo Bonzini wrote: > Il 17/07/2012 09:45, James Bottomley ha scritto: > > On Mon, 2012-07-16 at 19:20 +0200, Paolo Bonzini wrote: > >> Il 16/07/2012 18:18, James Bottomley ha scritto: > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > > index b583277..6d8ca08 100644 > > --- a/drivers/scsi/scsi_lib.c > > +++ b/drivers/scsi/scsi_lib.c > > @@ -843,8 +843,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, > > unsigned int good_bytes) > > } else if (sense_valid && !sense_deferred) { > > switch (sshdr.sense_key) { > > case UNIT_ATTENTION: > > - if (cmd->device->removable) { > > - /* Detected disc change. Set a bit > > + if (cmd->device->removable && > > + (sshdr.asc == 0x3a || > > +(sshdr.asc == 0x28 && sshdr.ascq == > > 0x00))) { > > + /* "No medium" or "Medium may have > > changed." > > +* This means a disc change. Set a bit > >>> This type of change would likely cause a huge cascade of errors in real > >>> removable media devices. Under the MMC standards, which a lot of the > >>> older removable discs seem to follow, UNIT ATTENTION indicates either > >>> medium change or device reset (which we check for and eat lower down); > >>> we can't rely on them giving proper SBC-2 sense codes. If you want to > >>> pretend to be removable media, you have to conform to its standards. > >> > >> Would you accept a patch doing the opposite, i.e. passing some sense > >> codes such as PARAMETERS CHANGED and TARGET OPERATING CONDITIONS HAVE > >> CHANGED? > > > > Could you explain what the problem actually is? It looks like you had a > > reason to mark virtio-scsi as removable, even though it isn't, and now > > you want to add further hacks because being removable doesn't quite > > work. > > It's not specific to virtio-scsi, in fact I expect that virtio-scsi will > be almost always used with non-removable disks. > > However, QEMU's SCSI target is not used just for virtio-scsi (for > example it can be used for USB storage), and it lets you mark a disk as > removable---why? because there exists real hardware that presents itself > as an SBC removable disk. The only thing that is specific to > virtualization, is support for online resizing (which generates a unit > attention condition CAPACITY DATA HAS CHANGED). So what's the problem? If you're doing pass through of a physical disk, we pick up removable from its inquiry string ... a physical removable device doesn't get resized. If you have a virtual disk you want to resize, you don't set the removable flag in the inquiry data. James -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] driver core: move uevent call to driver_register
Hi, On Mon, 16 Jul 2012, Greg Kroah-Hartman wrote: > On Tue, Jul 17, 2012 at 09:35:02AM +0800, Ming Lei wrote: > > On Tue, Jul 3, 2012 at 1:08 AM, Sebastian Ott > > wrote: > > > > > --- a/drivers/base/driver.c > > > +++ b/drivers/base/driver.c > > > @@ -187,6 +187,9 @@ int driver_register(struct device_driver > > > ret = driver_add_groups(drv, drv->groups); > > > if (ret) > > > bus_remove_driver(drv); > > > + > > > + kobject_uevent(&drv->p->kobj, KOBJ_ADD); > > > > You should just send the uevent if 'ret' equals to zero., otherwise > > OOPS may be triggered by kobject_uevent() after the 'drv' has been > > removed. > > Ugh, just missed that. > > Sebastian, care to send a follow-on patch for this? Here is the follow-on. (I've also replied with an updated patch..just in case you want to take that one). [PATCH] driver core: don't trigger uevent after failure Do not send the uevent if driver_add_groups failed. Found-by: Ming Lei Signed-off-by: Sebastian Ott --- drivers/base/driver.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -185,9 +185,10 @@ int driver_register(struct device_driver if (ret) return ret; ret = driver_add_groups(drv, drv->groups); - if (ret) + if (ret) { bus_remove_driver(drv); - + return ret; + } kobject_uevent(&drv->p->kobj, KOBJ_ADD); return ret; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFT] regulator: palmas: Fix calcuating selector in palmas_map_voltage_smps
Good catch, thanks I totally forgot about the range bit in that function. Acked-by: Graeme Gregory On 17/07/12 04:29, Axel Lin wrote: > The logic of calculating selector in palmas_map_voltage_smps() does not match > the logic to list voltage in palmas_list_voltage_smps(). > > We use below equation to calculate voltage when selector > 0: > voltage = (0.49V + (selector * 0.01V)) * RANGE > RANGE is either x1 or x2 > > So we need to take into account with the multiplier set in VSEL register when > calculating selector in palmas_map_voltage_smps() > > Signed-off-by: Axel Lin > --- > drivers/regulator/palmas-regulator.c | 17 ++--- > 1 file changed, 14 insertions(+), 3 deletions(-) > > diff --git a/drivers/regulator/palmas-regulator.c > b/drivers/regulator/palmas-regulator.c > index 7540c95..17d19fb 100644 > --- a/drivers/regulator/palmas-regulator.c > +++ b/drivers/regulator/palmas-regulator.c > @@ -373,11 +373,22 @@ static int palmas_set_voltage_smps_sel(struct > regulator_dev *dev, > static int palmas_map_voltage_smps(struct regulator_dev *rdev, > int min_uV, int max_uV) > { > + struct palmas_pmic *pmic = rdev_get_drvdata(rdev); > + int id = rdev_get_id(rdev); > int ret, voltage; > > - ret = ((min_uV - 50) / 1) + 1; > - if (ret < 0) > - return ret; > + if (min_uV == 0) > + return 0; > + > + if (pmic->range[id]) { /* RANGE is x2 */ > + if (min_uV < 100) > + min_uV = 100; > + ret = DIV_ROUND_UP(min_uV - 100, 2) + 1; > + } else {/* RANGE is x1 */ > + if (min_uV < 50) > + min_uV = 50; > + ret = DIV_ROUND_UP(min_uV - 50, 1) + 1; > + } > > /* Map back into a voltage to verify we're still in bounds */ > voltage = palmas_list_voltage_smps(rdev, ret); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH] snd-opti9xx: Implement suspend/resume
On Tuesday 17 July 2012, Rajeev kumar wrote: > Hello Ondrej, > > On 7/17/2012 12:46 PM, Ondrej Zary wrote: > > Implement suspend/resume support for Opti 92x and 93x chips. > > Tested with Opti 929A+AD1848 and Opti 931. > > > > Signed-off-by: Ondrej Zary > > > > > +#ifdef CONFIG_PM > > +static int snd_opti9xx_suspend(struct snd_card *card) > > +{ > > + struct snd_opti9xx *chip = card->private_data; > > + > > + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); > > + chip->codec->suspend(chip->codec); > > + return 0; > > +} > > + > > +static int snd_opti9xx_resume(struct snd_card *card) > > +{ > > + struct snd_opti9xx *chip = card->private_data; > > + int error, xdma2; > > +#if defined(CS4231) || defined(OPTi93X) > > + xdma2 = dma2; > > +#else > > + xdma2 = -1; > > +#endif > > + > > + error = snd_opti9xx_configure(chip, port, irq, dma1, xdma2, > > + mpu_port, mpu_irq); > > dma1?? What's wrong with that? It's a global variable. The driver is old and only supports one card. > > + if (error) > > + return error; > > Variable name is not convincing. You are assuming that the function is > returning error. This is consistent with other code in the driver. > > > + chip->codec->resume(chip->codec); > > + snd_power_change_state(card, SNDRV_CTL_POWER_D0); > > blank line required. This is consistent with other code in the driver. > > + return 0; > > +} > > + > > +static int snd_opti9xx_isa_suspend(struct device *dev, unsigned int n, > > + pm_message_t state) > > +{ > > + return snd_opti9xx_suspend(dev_get_drvdata(dev)); > > +} > > + > > +static int snd_opti9xx_isa_resume(struct device *dev, unsigned int n) > > +{ > > + return snd_opti9xx_resume(dev_get_drvdata(dev)); > > +} > > +#endif > > + > > static struct isa_driver snd_opti9xx_driver = { > > .match = snd_opti9xx_isa_match, > > .probe = snd_opti9xx_isa_probe, > > .remove = __devexit_p(snd_opti9xx_isa_remove), > > - /* FIXME: suspend/resume */ > > +#ifdef CONFIG_PM > > + .suspend= snd_opti9xx_isa_suspend, > > + .resume = snd_opti9xx_isa_resume, > > +#endif > > .driver = { > > .name = DEV_NAME > > }, > > @@ -1123,12 +1165,29 @@ static void __devexit > > snd_opti9xx_pnp_remove(struct pnp_card_link * pcard) > > snd_opti9xx_pnp_is_probed = 0; > > } > > > > +#ifdef CONFIG_PM > > +static int snd_opti9xx_pnp_suspend(struct pnp_card_link *pcard, > > + pm_message_t state) > > +{ > > + return snd_opti9xx_suspend(pnp_get_card_drvdata(pcard)); > > +} > > + > > +static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) > > +{ > > + return snd_opti9xx_resume(pnp_get_card_drvdata(pcard)); > > +} > > +#endif > > + > > static struct pnp_card_driver opti9xx_pnpc_driver = { > > .flags = PNP_DRIVER_RES_DISABLE, > > .name = "opti9xx", > > Why this is opti why not OPTi It's not my code. This patch is just adding suspend/resume and not changing anything else. > ~Rajeev > > > .id_table = snd_opti9xx_pnpids, > > .probe = snd_opti9xx_pnp_probe, > > .remove = __devexit_p(snd_opti9xx_pnp_remove), > > +#ifdef CONFIG_PM > > + .suspend= snd_opti9xx_pnp_suspend, > > + .resume = snd_opti9xx_pnp_resume, > > +#endif > > }; > > #endif -- Ondrej Zary -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] driver core: move uevent call to driver_register
Hi, On Tue, 17 Jul 2012, Ming Lei wrote: > On Tue, Jul 3, 2012 at 1:08 AM, Sebastian Ott > wrote: > > > --- a/drivers/base/driver.c > > +++ b/drivers/base/driver.c > > @@ -187,6 +187,9 @@ int driver_register(struct device_driver > > ret = driver_add_groups(drv, drv->groups); > > if (ret) > > bus_remove_driver(drv); > > + > > + kobject_uevent(&drv->p->kobj, KOBJ_ADD); > > You should just send the uevent if 'ret' equals to zero., otherwise > OOPS may be triggered by kobject_uevent() after the 'drv' has been > removed. oh..sorry. Missed that. Thanks for catching it! Updated Patch: [PATCH] driver core: move uevent call to driver_register Device driver attribute groups are created after userspace is notified via an add event. Fix this by moving the kobject_uevent call to driver_register after the attribute groups are added. Signed-off-by: Sebastian Ott --- drivers/base/bus.c|1 - drivers/base/driver.c |6 +- 2 files changed, 5 insertions(+), 2 deletions(-) --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -743,7 +743,6 @@ int bus_add_driver(struct device_driver } } - kobject_uevent(&priv->kobj, KOBJ_ADD); return 0; out_unregister: --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -185,8 +185,12 @@ int driver_register(struct device_driver if (ret) return ret; ret = driver_add_groups(drv, drv->groups); - if (ret) + if (ret) { bus_remove_driver(drv); + return ret; + } + kobject_uevent(&drv->p->kobj, KOBJ_ADD); + return ret; } EXPORT_SYMBOL_GPL(driver_register); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
Il 17/07/2012 10:29, Asias He ha scritto: > So, vhost-blk at least saves ~6 syscalls for us in each request. Are they really 6? If I/O is coalesced by a factor of 3, for example (i.e. each exit processes 3 requests), it's really 2 syscalls per request. Also, is there anything we can improve? Perhaps we can modify epoll and ask it to clear the eventfd for us (would save 2 reads)? Or io_getevents (would save 1)? > I guess you mean qemu here. Yes, in theory, qemu's block layer can be > improved to achieve similar performance as vhost-blk or kvm tool's > userspace virito-blk has. But I think it makes no sense to prevent one > solution becase there is another in theory solution called: we can do > similar in qemu. It depends. Like vhost-scsi, vhost-blk has the problem of a crippled feature set: no support for block device formats, non-raw protocols, etc. This makes it different from vhost-net. So it begs the question, is it going to be used in production, or just a useful reference tool? Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V2] [SCSI] async: Fixup arguments of async_synchronize_full_domain in alsa-soc
Dear James Bottomley, > On Mon, 2012-07-16 at 15:56 -0700, Dan Williams wrote: > > On Mon, Jul 16, 2012 at 3:17 PM, Marek Vasut wrote: > > [..] > > > > > sound/soc/soc-dapm.c |2 +- > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > V2: CC proper people, hopefully proper mailing list > > > > > > Adjust the patch subject with proper tags > > > > > > diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c > > > index eded657..e491eb0 100644 > > > --- a/sound/soc/soc-dapm.c > > > +++ b/sound/soc/soc-dapm.c > > > @@ -1572,7 +1572,7 @@ static int dapm_power_widgets(struct > > > snd_soc_dapm_context *dapm, int event) > > > > > > struct snd_soc_dapm_context *d; > > > LIST_HEAD(up_list); > > > LIST_HEAD(down_list); > > > > > > - LIST_HEAD(async_domain); > > > + ASYNC_DOMAIN_EXCLUSIVE(async_domain); > > > > > > enum snd_soc_bias_level bias; > > > > Thanks. This was folded into the resend that was submitted week [1] > > OK, I missed that with the cockup in the cover letters. Well, the main point is it's now fixed. Thanks guys! :-) > James Best regards, Marek Vasut -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
Il 17/07/2012 10:40, James Bottomley ha scritto: >> > >> > It's not specific to virtio-scsi, in fact I expect that virtio-scsi will >> > be almost always used with non-removable disks. >> > >> > However, QEMU's SCSI target is not used just for virtio-scsi (for >> > example it can be used for USB storage), and it lets you mark a disk as >> > removable---why? because there exists real hardware that presents itself >> > as an SBC removable disk. The only thing that is specific to >> > virtualization, is support for online resizing (which generates a unit >> > attention condition CAPACITY DATA HAS CHANGED). > So what's the problem? If you're doing pass through of a physical disk, > we pick up removable from its inquiry string ... a physical removable > device doesn't get resized. If you have a virtual disk you want to > resize, you don't set the removable flag in the inquiry data. In practice people will do what you said, and it's not a problem. However, there's nothing that prevents you from running qemu with a removable SCSI disk, and then resizing it. I would like this to work, because SBC allows it and there's no reason why it shouldn't. Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH] snd-opti9xx: Implement suspend/resume
At Tue, 17 Jul 2012 09:16:44 +0200, Ondrej Zary wrote: > > Implement suspend/resume support for Opti 92x and 93x chips. > Tested with Opti 929A+AD1848 and Opti 931. > > Signed-off-by: Ondrej Zary Applied now. Thanks. Takashi > > --- a/sound/isa/opti9xx/opti92x-ad1848.c > +++ b/sound/isa/opti9xx/opti92x-ad1848.c > @@ -136,8 +136,8 @@ struct snd_opti9xx { > #ifdef OPTi93X > unsigned long mc_indir_index; > struct resource *res_mc_indir; > - struct snd_wss *codec; > #endif /* OPTi93X */ > + struct snd_wss *codec; > unsigned long pwd_reg; > > spinlock_t lock; > @@ -870,9 +870,7 @@ static int __devinit snd_opti9xx_probe(struct snd_card > *card) > &codec); > if (error < 0) > return error; > -#ifdef OPTi93X > chip->codec = codec; > -#endif > error = snd_wss_pcm(codec, 0, &pcm); > if (error < 0) > return error; > @@ -1053,11 +1051,55 @@ static int __devexit snd_opti9xx_isa_remove(struct > device *devptr, > return 0; > } > > +#ifdef CONFIG_PM > +static int snd_opti9xx_suspend(struct snd_card *card) > +{ > + struct snd_opti9xx *chip = card->private_data; > + > + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); > + chip->codec->suspend(chip->codec); > + return 0; > +} > + > +static int snd_opti9xx_resume(struct snd_card *card) > +{ > + struct snd_opti9xx *chip = card->private_data; > + int error, xdma2; > +#if defined(CS4231) || defined(OPTi93X) > + xdma2 = dma2; > +#else > + xdma2 = -1; > +#endif > + > + error = snd_opti9xx_configure(chip, port, irq, dma1, xdma2, > + mpu_port, mpu_irq); > + if (error) > + return error; > + chip->codec->resume(chip->codec); > + snd_power_change_state(card, SNDRV_CTL_POWER_D0); > + return 0; > +} > + > +static int snd_opti9xx_isa_suspend(struct device *dev, unsigned int n, > +pm_message_t state) > +{ > + return snd_opti9xx_suspend(dev_get_drvdata(dev)); > +} > + > +static int snd_opti9xx_isa_resume(struct device *dev, unsigned int n) > +{ > + return snd_opti9xx_resume(dev_get_drvdata(dev)); > +} > +#endif > + > static struct isa_driver snd_opti9xx_driver = { > .match = snd_opti9xx_isa_match, > .probe = snd_opti9xx_isa_probe, > .remove = __devexit_p(snd_opti9xx_isa_remove), > - /* FIXME: suspend/resume */ > +#ifdef CONFIG_PM > + .suspend= snd_opti9xx_isa_suspend, > + .resume = snd_opti9xx_isa_resume, > +#endif > .driver = { > .name = DEV_NAME > }, > @@ -1123,12 +1165,29 @@ static void __devexit snd_opti9xx_pnp_remove(struct > pnp_card_link * pcard) > snd_opti9xx_pnp_is_probed = 0; > } > > +#ifdef CONFIG_PM > +static int snd_opti9xx_pnp_suspend(struct pnp_card_link *pcard, > +pm_message_t state) > +{ > + return snd_opti9xx_suspend(pnp_get_card_drvdata(pcard)); > +} > + > +static int snd_opti9xx_pnp_resume(struct pnp_card_link *pcard) > +{ > + return snd_opti9xx_resume(pnp_get_card_drvdata(pcard)); > +} > +#endif > + > static struct pnp_card_driver opti9xx_pnpc_driver = { > .flags = PNP_DRIVER_RES_DISABLE, > .name = "opti9xx", > .id_table = snd_opti9xx_pnpids, > .probe = snd_opti9xx_pnp_probe, > .remove = __devexit_p(snd_opti9xx_pnp_remove), > +#ifdef CONFIG_PM > + .suspend= snd_opti9xx_pnp_suspend, > + .resume = snd_opti9xx_pnp_resume, > +#endif > }; > #endif > > > -- > Ondrej Zary > ___ > Alsa-devel mailing list > alsa-de...@alsa-project.org > http://mailman.alsa-project.org/mailman/listinfo/alsa-devel > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH] snd-opti9xx: Implement suspend/resume
On 7/17/2012 2:21 PM, Ondrej Zary wrote: static struct pnp_card_driver opti9xx_pnpc_driver = { > > .flags = PNP_DRIVER_RES_DISABLE, > > .name = "opti9xx", > > Why this is opti why not OPTi It's not my code. This patch is just adding suspend/resume and not changing anything else. Oops, sorry for this comment. I thought this line is changed by you. ~Rajeev -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/5] cpusets: dynamical scheduler domain flags
From: Michael Wang This patch set provide a way for user to dynamically configure the scheduler domain flags, which usually to be static. We can do the configuration through cpuset cgroup, new file will be found under each hierarchy: sched_smt_domain_flag -- appear when CONFIG_SCHED_SMT enabled, stands for the domain flags of cpuset on smt level sched_mc_domain_flag -- appear when CONFIG_SCHED_MC enabled, stands for the domain flags of cpuset on mc level sched_book_domain_flag -- appear when CONFIG_SCHED_BOOK enabled, stands for the domain flags of cpuset on book level sched_cpu_domain_flag -- stands for the domain flags of cpuset on cpu level sched_enable_domain_flag -- set to 1 if want to use the dynamical domain flags, domain will be rebuild Examples: Currently the domain flags bit are: #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE0x0400 /* Only a single load balancing instance */ #define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ If we want to set SMT domain to be: 'SD_LOAD_BALANCE | SD_BALANCE_WAKE | SD_OVERLAP' and other level domain only has SD_LOAD_BALANCE, then we can use: echo 0x1011 > /sys/fs/cgroup/cpuset/cpuset.sched_smt_domain_flag echo 0x1 > /sys/fs/cgroup/cpuset/cpuset.sched_mc_domain_flag echo 0x1 > /sys/fs/cgroup/cpuset/cpuset.sched_book_domain_flag echo 0x1 > /sys/fs/cgroup/cpuset/cpuset.sched_cpu_domain_flag echo 1 > /sys/fs/cgroup/cpuset/cpuset.sched_enable_domain_flag including: cpusets: add basic variables cpusets: add functions and code for initialization cpusets: enable the dynamical domain flags cpusets: add fundamental functions for recording cpusets: add the configuration facility Signed-off-by: Michael Wang --- b/include/linux/sched.h | 22 b/kernel/cpuset.c |7 ++ b/kernel/sched/core.c |2 include/linux/sched.h | 10 ++- kernel/cpuset.c | 131 +++- kernel/sched/core.c | 10 +++ 6 files changed, 176 insertions(+), 6 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/5] cpusets: add basic variables
From: Michael Wang Add the variables we need for the implementation of dynamical domain flags. Signed-off-by: Michael Wang --- include/linux/sched.h | 22 ++ kernel/cpuset.c |7 +++ 2 files changed, 29 insertions(+), 0 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index ddc1225..9164309 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -922,8 +922,30 @@ static inline unsigned int group_first_cpu(struct sched_group *group) return cpumask_first(sched_group_cpus(group)); } +enum { +#ifdef CONFIG_SCHED_SMT + SMT_DDF, +#endif +#ifdef CONFIG_SCHED_MC + MC_DDF, +#endif +#ifdef CONFIG_SCHED_BOOK + BOOK_DDF, +#endif + CPU_DDF, + MAX_DDF, +}; + struct sched_domain_attr { int relax_domain_level; + /* +* dynamical domain flag array cover all level, only useful if +* enable_ddf is 1. +* the static domain flags will be replaced by ddf on each domain +* level dynamically in set_domain_attribute. +*/ + u64 ddf[MAX_DDF]; + int enable_ddf; }; #define SD_ATTR_INIT (struct sched_domain_attr) {\ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0cbc631..35fb585 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -105,6 +105,13 @@ struct cpuset { /* for custom sched domain */ int relax_domain_level; + /* +* dynamical domain flag configuration for cpuset, see struct +* sched_domain_attr for detail. +*/ + u64 ddf[MAX_DDF]; + int enable_ddf; + /* used for walking a cpuset hierarchy */ struct list_head stack_list; }; -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/5] cpusets: add functions and code for initialization
From: Michael Wang Add the functions and code which will do initialization for dynamical domain flags. Signed-off-by: Michael Wang --- include/linux/sched.h | 10 -- kernel/cpuset.c |8 ++-- kernel/sched/core.c |2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 9164309..3c91116 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -948,10 +948,16 @@ struct sched_domain_attr { int enable_ddf; }; -#define SD_ATTR_INIT (struct sched_domain_attr) {\ - .relax_domain_level = -1, \ +static inline void sd_attr_init(struct sched_domain_attr *attr) +{ + int i; + attr->relax_domain_level = -1; + for (i = 0; i < MAX_DDF; i++) + attr->ddf[i] = 0; + attr->enable_ddf = 0; } + extern int sched_domain_level_max; struct sched_domain { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 35fb585..67ee111 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -588,7 +588,7 @@ static int generate_sched_domains(cpumask_var_t **domains, dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { - *dattr = SD_ATTR_INIT; + sd_attr_init(dattr); update_domain_attr_tree(dattr, &top_cpuset); } cpumask_copy(doms[0], top_cpuset.cpus_allowed); @@ -697,7 +697,7 @@ restart: cpumask_clear(dp); if (dattr) - *(dattr + nslot) = SD_ATTR_INIT; + sd_attr_init(dattr + nslot); for (j = i; j < csn; j++) { struct cpuset *b = csa[j]; @@ -1834,6 +1834,7 @@ static void cpuset_post_clone(struct cgroup *cgroup) static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) { + int i; struct cpuset *cs; struct cpuset *parent; @@ -1859,6 +1860,9 @@ static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) nodes_clear(cs->mems_allowed); fmeter_init(&cs->fmeter); cs->relax_domain_level = -1; + for (i = 0; i < MAX_DDF; i++) + cs->ddf[i] = 0; + cs->enable_ddf = 0; cs->parent = parent; number_of_cpusets++; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3987b9d..3f9d368 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7087,7 +7087,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, if (!new && !cur) return 1; - tmp = SD_ATTR_INIT; + sd_attr_init(&tmp); return !memcmp(cur ? (cur + idx_cur) : &tmp, new ? (new + idx_new) : &tmp, sizeof(struct sched_domain_attr)); -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/5] cpusets: enable the dynamical domain flags
From: Michael Wang We will record the domain flags for cpuset in update_domain_attr and use it to replace the static domain flags in set_domain_attribute. Signed-off-by: Michael Wang --- kernel/cpuset.c |7 +++ kernel/sched/core.c | 10 +- 2 files changed, 16 insertions(+), 1 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 67ee111..def8080 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -476,8 +476,15 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) static void update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) { + int i; if (dattr->relax_domain_level < c->relax_domain_level) dattr->relax_domain_level = c->relax_domain_level; + /* record all the domain flags if cpuset want to use them */ + if (c->enable_ddf) { + dattr->enable_ddf = 1; + for (i = 0; i < MAX_DDF; i++) + dattr->ddf[i] = c->ddf[i]; + } return; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3f9d368..7a1c886 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6461,7 +6461,15 @@ static void set_domain_attribute(struct sched_domain *sd, { int request; - if (!attr || attr->relax_domain_level < 0) { + if (!attr) + return; + + if (attr->enable_ddf) { + /* replace domain flags with ddf according to level */ + sd->flags = attr->ddf[sd->level]; + } + + if (attr->relax_domain_level < 0) { if (default_relax_domain_level < 0) return; else -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/5] cpusets: add fundamental functions for recording
From: Michael Wang Add the fundamental functions which will help to record the status of dynamical domain flags for cpuset. Signed-off-by: Michael Wang --- kernel/cpuset.c | 31 +++ 1 files changed, 31 insertions(+), 0 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index def8080..066c61c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1189,6 +1189,37 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) } /* + * update the ddf of cs according to level and rebuild domain + * if ddf enabled. + */ +static int update_ddf(int level, struct cpuset *cs, u64 val) +{ + if (!is_sched_load_balance(cs)) + return -EINVAL; + + cs->ddf[level] = val; + if (cs->enable_ddf) + async_rebuild_sched_domains(); + + return 0; +} + +/* enable or disable ddf supporting and rebuild domain */ +static int enable_ddf(struct cpuset *cs, u64 val) +{ + if (!is_sched_load_balance(cs)) + return -EINVAL; + + if ((val != 0 && val != 1) || cs->enable_ddf == val) + return -EINVAL; + + cs->enable_ddf = val; + async_rebuild_sched_domains(); + + return 0; +} + +/* * cpuset_change_flag - make a task's spread flags the same as its cpuset's * @tsk: task to be updated * @scan: struct cgroup_scanner containing the cgroup of the task -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 5/5] cpusets: add the configuration facility
From: Michael Wang Add the facility for user to configure the dynamical domain flags and enable/disable it. Signed-off-by: Michael Wang --- kernel/cpuset.c | 85 +++ 1 files changed, 85 insertions(+), 0 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 066c61c..5ff649d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1506,6 +1506,17 @@ typedef enum { FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, +#ifdef CONFIG_SCHED_SMT + FILE_SCHED_SMT_DDF, +#endif +#ifdef CONFIG_SCHED_MC + FILE_SCHED_MC_DDF, +#endif +#ifdef CONFIG_SCHED_BOOK + FILE_SCHED_BOOK_DDF, +#endif + FILE_SCHED_CPU_DDF, + FILE_SCHED_ENABLE_DDF, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, @@ -1549,6 +1560,27 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; +#ifdef CONFIG_SCHED_SMT + case FILE_SCHED_SMT_DDF: + retval = update_ddf(SMT_DDF, cs, val); + break; +#endif +#ifdef CONFIG_SCHED_MC + case FILE_SCHED_MC_DDF: + retval = update_ddf(MC_DDF, cs, val); + break; +#endif +#ifdef CONFIG_SCHED_BOOK + case FILE_SCHED_BOOK_DDF: + retval = update_ddf(BOOK_DDF, cs, val); + break; +#endif + case FILE_SCHED_CPU_DDF: + retval = update_ddf(CPU_DDF, cs, val); + break; + case FILE_SCHED_ENABLE_DDF: + retval = enable_ddf(cs, val); + break; default: retval = -EINVAL; break; @@ -1708,6 +1740,22 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); +#ifdef CONFIG_SCHED_SMT + case FILE_SCHED_SMT_DDF: + return cs->ddf[SMT_DDF]; +#endif +#ifdef CONFIG_SCHED_MC + case FILE_SCHED_MC_DDF: + return cs->ddf[MC_DDF]; +#endif +#ifdef CONFIG_SCHED_BOOK + case FILE_SCHED_BOOK_DDF: + return cs->ddf[BOOK_DDF]; +#endif + case FILE_SCHED_CPU_DDF: + return cs->ddf[CPU_DDF]; + case FILE_SCHED_ENABLE_DDF: + return cs->enable_ddf; default: BUG(); } @@ -1788,6 +1836,43 @@ static struct cftype files[] = { .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, +#ifdef CONFIG_SCHED_SMT + { + .name = "sched_smt_domain_flag", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_SMT_DDF, + }, +#endif +#ifdef CONFIG_SCHED_MC + { + .name = "sched_mc_domain_flag", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_MC_DDF, + }, +#endif +#ifdef CONFIG_SCHED_BOOK + { + .name = "sched_book_domain_flag", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_BOOK_DDF, + }, +#endif + { + .name = "sched_cpu_domain_flag", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_CPU_DDF, + }, + { + .name = "sched_enable_domain_flag", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_ENABLE_DDF, + }, + { .name = "memory_migrate", .read_u64 = cpuset_read_u64, -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RFC V4 3/3] kvm: Choose better candidate for directed yield
On 07/17/2012 01:59 PM, Avi Kivity wrote: On 07/16/2012 07:10 PM, Rik van Riel wrote: On 07/16/2012 06:07 AM, Avi Kivity wrote: +{ +bool eligible; + +eligible = !vcpu->ple.cpu_relax_intercepted || +(vcpu->ple.cpu_relax_intercepted&& + vcpu->ple.dy_eligible); + +if (vcpu->ple.cpu_relax_intercepted) +vcpu->ple.dy_eligible = !vcpu->ple.dy_eligible; Probably should assign 'true', since the previous value is essentially random. I suspect the intended purpose of this conditional is to flip the eligibility of a vcpu for being selected as a direct yield target. In other words, that bit of the code is correct. If vcpu A is in a long spin loop and is preempted away, and vcpu B dips several times in kvm_vcpu_on_spin(), then it will act as intended. Yes, true. But if vcpu A is spinning for x% of its time and processing on the other, then vcpu B will flip its dy_eligible for those x%, and not flip it when it's processing. I don't understand how this is useful. Suppose A is doing really good job and and has not done pause loop exit, we will not touch it's dy_eligible flag. Also dy_eligible flag will not prevent B doing yield_to to A. Suppose A has started spinning in the beginning itself, it will do pause loop exit if it crosses threshold, and we will now start toggling dy_eligible. Was that you were referring? And it seems we may still have to set dy_eligible flag to false at the beginning of vcpu_on_spin along with cpu_relax_intercepted = true, like below, so that we do not have spill-over status from previous PL exits. vcpu_on_spin() { cpu_relax_intercepted = true; dy_eligible = false; . . . cpu_relax_intercepted = false; } Let me know if that addresses your concern. I guess this is an attempt to impose fairness on yielding, and it makes sense to do this, but I don't know if this is the best way to achieve it. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
On Tue, 2012-07-17 at 10:54 +0200, Paolo Bonzini wrote: > Il 17/07/2012 10:40, James Bottomley ha scritto: > >> > > >> > It's not specific to virtio-scsi, in fact I expect that virtio-scsi will > >> > be almost always used with non-removable disks. > >> > > >> > However, QEMU's SCSI target is not used just for virtio-scsi (for > >> > example it can be used for USB storage), and it lets you mark a disk as > >> > removable---why? because there exists real hardware that presents itself > >> > as an SBC removable disk. The only thing that is specific to > >> > virtualization, is support for online resizing (which generates a unit > >> > attention condition CAPACITY DATA HAS CHANGED). > > So what's the problem? If you're doing pass through of a physical disk, > > we pick up removable from its inquiry string ... a physical removable > > device doesn't get resized. If you have a virtual disk you want to > > resize, you don't set the removable flag in the inquiry data. > > In practice people will do what you said, and it's not a problem. > > However, there's nothing that prevents you from running qemu with a > removable SCSI disk, and then resizing it. I would like this to work, > because SBC allows it and there's no reason why it shouldn't. There's no such thing in the market today as a removable disk that's resizeable. Removable disks are for things like backup cartridges and ageing jazz drives. Worse: most removeable devices today are USB card readers whose standards compliance varies from iffy to non existent. Resizeable disks are currently the province of storage arrays. We don't do stuff just because the standards allows it; just the opposite: we try to use the smallest implementations from the standards we can get away with just because the more things we do, the more exceptions and broken devices we come across. James James -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
On Tuesday 17 July 2012, Jon Masters wrote: > On 07/16/2012 08:16 AM, Pavel Machek wrote: > > >> If an implementation supports AArch32 at EL3 there could be some > >> physical (or some FPGA config) switch to choose between the two. But > >> since AArch64 is mandated, I don't see why one would force AArch32 at > >> EL3 and therefore all lower exception levels (and make a big part of the > >> processor unused). > > > > Actually I see one ... and I can bet it will happen. > > > > So you create that shiny new ARMv8 compliant CPU, 8 cores, 2GHz. HTC > > will want to use it with 1GB of RAM... and put around exiting OMAP > > perihepals. > > But that's why we have Eagle (A15). It's a very capable 32-bit design > from ARM and far more sensible for such designs. You can easily build > something with a few A15 clusters in it, as we're already seeing. Right, I would say that with any CPU core more powerful than this one or with more than a few of these, you will also have trouble coming up with workloads that really require the CPU performance but don't also require a 64 bit virtual address space in either user space or kernel. Arnd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
On Tuesday 17 July 2012, Christoph Hellwig wrote: > On Sun, Jul 15, 2012 at 07:43:07PM +, Arnd Bergmann wrote: > > Yes, I agree that's the best way to handle this. Compared to other > > architectures, I think x86 is the only that allows booting either a > > 32 or 64 bit kernel on the same system. We used to support 32 bit > > kernels on 64 bit PowerMac, but nobody used it and we discontinued > > it long ago. Tile 64 bit is actually incompatible with 32 bit kernels > > at the architecture level and would require a third mode. On sparc, > > parisc and mips, AFAIK we could support 32 bit kernels on 64 bit > > machines, but never did. > > On mips it works just fine. On Sparc I don't think Linux ever did it, > but Solaris did for a long time, as did (IIRC) NetBSD/OpenBSD. Ah, I didn't know about mips doing that. I also just remembered that s390 supports running 31 bit kernels on all 64 bit machines, but there is no longer official support for that from IBM's side AFAIK. I certainly expect ARM to be similar to powperpc and sparc here, and anyone trying to submit a 32 bit kernel port for a 64 bit platform will have a hard time arguing why that should be accepted into mainline. Arnd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] drivers: bus: add a new driver for omap-ocp2scp
On Tuesday 17 July 2012, ABRAHAM, KISHON VIJAY wrote: > On Mon, Jul 16, 2012 at 7:43 PM, Kishon Vijay Abraham I wrote: > > Adds a new driver *omap-ocp2scp*. This driver takes the responsibility of > > creating all the devices that is connected to OCP2SCP. In the case of OMAP4, > > USB2PHY is connected to ocp2scp. > > > > This also includes device tree support for ocp2scp driver and > > the documentation with device tree binding information is updated. > > > > Cc: Felipe Balbi > > Cc: Arnd Bergmann > > Signed-off-by: Kishon Vijay Abraham I Acked-by: Arnd Bergmann Now that we've found a place for the code to live, do we know which maintainer is going to pick it up? If nobody else volunteers, we can take it through the arm-soc tree. Arnd -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mfd: anatop: matches should be NULL when populate child devices
Hi Richard, On Mon, Jul 16, 2012 at 04:55:57PM +0800, Richard Zhao wrote: > Signed-off-by: Richard Zhao > --- > drivers/mfd/anatop-mfd.c |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Patch applied, thanks. Cheers, Samuel. -- Intel Open Source Technology Centre http://oss.intel.com/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
On 07/17/2012 04:52 PM, Paolo Bonzini wrote: Il 17/07/2012 10:29, Asias He ha scritto: So, vhost-blk at least saves ~6 syscalls for us in each request. Are they really 6? If I/O is coalesced by a factor of 3, for example (i.e. each exit processes 3 requests), it's really 2 syscalls per request. Well. I am counting the number of syscalls in one notify and response process. Sure the IO can be coalesced. Also, is there anything we can improve? Perhaps we can modify epoll and ask it to clear the eventfd for us (would save 2 reads)? Or io_getevents (would save 1)? I guess you mean qemu here. Yes, in theory, qemu's block layer can be improved to achieve similar performance as vhost-blk or kvm tool's userspace virito-blk has. But I think it makes no sense to prevent one solution becase there is another in theory solution called: we can do similar in qemu. It depends. Like vhost-scsi, vhost-blk has the problem of a crippled feature set: no support for block device formats, non-raw protocols, etc. This makes it different from vhost-net. Data-plane qemu also has this cripppled feature set problem, no? Does user always choose to use block devices format like qcow2? What if they prefer raw image or raw block device? So it begs the question, is it going to be used in production, or just a useful reference tool? This should be decided by user, I can not speak for them. What is wrong with adding one option for user which they can decide? -- Asias -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] perf/x86: Fix LLC-* and node-* events on Intel SandyBridge
From: "Yan, Zheng" LLC-* and node-* events require using the OFFCORE_RESPONSE events on SandyBridge, but the hw_cache_extra_regs is left uninitialized. This patch adds the missing extra register configure table for SandyBridge. Signed-off-by: Yan, Zheng --- arch/x86/kernel/cpu/perf_event_intel.c | 92 +--- 1 file changed, 86 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7a8b9d0..3823669 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -138,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +#define SNB_DMND_DATA_RD (1ULL << 0) +#define SNB_DMND_RFO (1ULL << 1) +#define SNB_DMND_IFETCH(1ULL << 2) +#define SNB_DMND_WB(1ULL << 3) +#define SNB_PF_DATA_RD (1ULL << 4) +#define SNB_PF_RFO (1ULL << 5) +#define SNB_PF_IFETCH (1ULL << 6) +#define SNB_LLC_DATA_RD(1ULL << 7) +#define SNB_LLC_RFO(1ULL << 8) +#define SNB_LLC_IFETCH (1ULL << 9) +#define SNB_BUS_LOCKS (1ULL << 10) +#define SNB_STRM_ST(1ULL << 11) +#define SNB_OTHER (1ULL << 15) +#define SNB_RESP_ANY (1ULL << 16) +#define SNB_NO_SUPP(1ULL << 17) +#define SNB_LLC_HITM (1ULL << 18) +#define SNB_LLC_HITE (1ULL << 19) +#define SNB_LLC_HITS (1ULL << 20) +#define SNB_LLC_HITF (1ULL << 21) +#define SNB_LOCAL (1ULL << 22) +#define SNB_REMOTE (0xffULL << 23) +#define SNB_SNP_NONE (1ULL << 31) +#define SNB_SNP_NOT_NEEDED (1ULL << 32) +#define SNB_SNP_MISS (1ULL << 33) +#define SNB_NO_FWD (1ULL << 34) +#define SNB_SNP_FWD(1ULL << 35) +#define SNB_HITM (1ULL << 36) +#define SNB_NON_DRAM (1ULL << 37) + +#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) +#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) +#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) + +#define SNB_SNP_ANY(SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ +SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ +SNB_HITM) + +#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) +#define SNB_DRAM_REMOTE(SNB_REMOTE|SNB_SNP_ANY) + +#define SNB_L3_ACCESS SNB_RESP_ANY +#define SNB_L3_MISS(SNB_DRAM_ANY|SNB_NON_DRAM) + +static __initconst const u64 snb_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, + [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, + }, + }, +}; + static __initconst const u64 snb_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -235,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids }, [ C(NODE) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, + [ C(RESULT_ACCESS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0x01b7, }, }, @@ -1964,6 +2042,8 @@ __init int intel_pmu_init(void) case 58: /* IvyBridge *
Re: [PATCH] sd: do not set changed flag on all unit attention conditions
Il 17/07/2012 11:11, James Bottomley ha scritto: > We don't do stuff just because the standards allows it; just the > opposite: we try to use the smallest implementations from the standards > we can get away with just because the more things we do, the more > exceptions and broken devices we come across. Yes, I realize failing only on specific sense codes as I did it in the patch is not going to work. However, the other way round is not problematic (explicitly allow some sense codes, fail on all others). Another example is "target operating conditions have changed". QEMU cannot report such changes because scsi_error prints a warning (fine) and then passes the unit attention upwards. With removable drives, this has the same problem as resizing. Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Setreuid distinction about (uid_t)-1
Hi folks, first I'd like to apologize if the question I'm asking is dumb or a little bit out of the scope of the list. I've been doing some testing on setuid functions family lately, and I found a weird behaviour I'm not able to explain myself. I'm using this small program to try and switch the uid of a user: #include #include #include #include int main(int argc, char** argv){ unsigned int uid; char *args[] = {"/bin/sh",NULL}; if (argc < 2){ printf("Usage: %s target_uid\n", argv[0]); exit(0); } uid = atoi(argv[1]); printf("%u\n",uid); if (setreuid(uid,uid)==-1){ printf("Setreuid to %u failed\n ",uid); perror("E"); exit(1); } execve("/bin/sh",args,NULL); return 1; } I've been calling this binary with a bunch of different uid numbers, and I came across this weird behaviour with the (uid_t) -1 value: adrian@home-pc:~$ /tmp/suid-tests Usage: /tmp/suid-tests target_uid adrian@home-pc:~$ /tmp/suid-tests 0 0 Setreuid to 0 failed E: Operation not permitted adrian@home-pc:~$ /tmp/suid-tests -1 4294967295 $ id uid=1000(adrian) gid=1000(adrian) groups=1000(adrian),4(adm),20(dialout),24(cdrom),46(plugdev),109(lpadmin),110(sambashare),111(admin) adrian@home-pc:~$ /tmp/suid-tests -2 4294967294 Setreuid to 4294967294 failed E: Operation not permitted adrian@home-pc:~$ /tmp/suid-tests -3 4294967293 Setreuid to 4294967293 failed E: Operation not permitted If the binary is setuid, the -1 call effectively rises the euid to root (0), although other arbitrary values are properly being set: adrian@home-pc:~$ ls -hl /tmp/suid-tests -rwsr-x--- 1 root adrian 8,5K 2012-07-17 10:53 /tmp/suid-tests adrian@home-pc:~$ /tmp/suid-tests -1 4294967295 # id uid=1000(adrian) gid=1000(adrian) euid=0(root) groups=0(root),4(adm),20(dialout),24(cdrom),46(plugdev),109(lpadmin),110(sambashare),111(admin),1000(adrian) adrian@home-pc:~$ /tmp/suid-tests -2 4294967294 $ id uid=4294967294 gid=1000(adrian) groups=4(adm),20(dialout),24(cdrom),46(plugdev),109(lpadmin),110(sambashare),111(admin),1000(adrian) I've been looking into kernel/sys.c, reading the setreuid function for an explanation. I've seen that there are several if cases for when the uid value is (uid_t)-1 but I still don't understand why is this being doing. I tried to trace down all the checks that take place, but I'm not quite familiar with the kernel and I feel I'm missing something. Is this an expected behaviour? If so, could someone please shed some light on why? Running kernels for the tests have been several on the 2.6.x, 2.6.38 x86_64 for example. Thanks in advance and regards, Adrián -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
Il 17/07/2012 11:21, Asias He ha scritto: >> It depends. Like vhost-scsi, vhost-blk has the problem of a crippled >> feature set: no support for block device formats, non-raw protocols, >> etc. This makes it different from vhost-net. > > Data-plane qemu also has this cripppled feature set problem, no? Yes, but that is just a proof of concept. We can implement a separate I/O thread within the QEMU block layer, and add fast paths that resemble data-path QEMU, without limiting the feature set. > Does user always choose to use block devices format like qcow2? What > if they prefer raw image or raw block device? If they do, the code should hit fast paths and be fast. But it should be automatic, without the need for extra knobs. aio=thread vs. aio=native is already one knob too much IMHO. >> So it begs the question, is it going to be used in production, or just a >> useful reference tool? > > This should be decided by user, I can not speak for them. What is wrong > with adding one option for user which they can decide? Having to explain the user about the relative benefits; having to support the API; having to handle transition from one more thing when something better comes out. Paolo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC/PATCH] Use kernel supplied MMU info for kvm tool
Hi Michael, On 2012-07-17 06:00, Michael Ellerman wrote: This is a series for kvmtool that uses a newish kernel API to get MMU info, which is then fed to the guest. Currently we just make a good guess based on the PVR, but that is potentially flakey in a few ways. The most notable is that if you don't specify hugepages we don't boot - because the guest is told we support 16M pages, but we don't really (on HV). Just had a look, all good. Thanks for tidying some old FIXMEs, especially the page/segment DT props encoding grot -- and the designated inits in the cpuinfo struct, whew, I heard the scream on IRC. Sorry. ;-) Acked-by: Matt Evans Matt -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] xfs: fix comment typo of struct xfs_da_blkinfo.
On Jul 17, 2012, at 3:06 PM, Christoph Hellwig wrote: > On Tue, Jul 17, 2012 at 11:33:33AM +0800, Chen Baozi wrote: >> I'd really love to. Right now, I am working on syslinux to support booting >> on xfs partition (under pcacjr's mentoring)??? which I thought would be a >> nice start to get familiar with xfs (and I did learn a lot from it). So I >> think there would be more time (and experience on xfs) after I finish the >> xfs support on syslinux. And I'm really looking forward to your ideas. So do >> please tell me what I can help, I'll try my best to do it. > > Btw, if you need more reviers for the syslinus support feel free to pass > it by me (or the list). Sure, thanks. > > Thanks for working on this! > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
getsockopt/setsockopt with SO_RCVBUF and SO_SNDBUF "non-standard" behaviour
Hi all, I looked on Internet and at the old thread http://lkml.indiana.edu/hypermail/linux/kernel/0108.0/0275.html, but the issue is still not settled as far as I see. I need to have the highest memory available for snd/rcv buffer and I need to know/confirm how much it allocated for my process (how much I can use). So with Linux we need to do something like: setsockopt (..., SO_RCVBUF, 256000, ...) getsockopt (..., SO_RCVBUF, &i, ...) i /= 2; where i is the size I am looking for. Now, to make this code work for other OSes it should be changed to: setsockopt (..., SO_RCVBUF, 256000, ...) getsockopt (..., SO_RCVBUF, &i, ...) #ifdef LINUX i /= 2; #endif First question, is this code correct? If not, what code gives the amount of memory useable for my process? Second, it seems to me that linux is definitely "non-standard" here. Saying that linux uses twice as memory has nothing to do with that, since getsockopt should return what the application can count on, not what is the internal use. It is like a hypothetical malloc (10) would return not 10, but 20 (including meta-information). Is that right? Cheers, -- Eugen Dedu http://eugen.dedu.free.fr -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mm: setup pageblock_order before it's used by sparse
Hi Ben, Any update about this topic? Thanks! Gerry On 2012-7-6 9:24, Benjamin Herrenschmidt wrote: > On Thu, 2012-07-05 at 18:00 -0700, Yinghai Lu wrote: >> cma, dma_continugous_reserve is referring pageblock_order very early >> too. >> just after init_memory_mapping() for x86's setup_arch. >> >> so set pageblock_order early looks like my -v2 patch is right way. >> >> current question: need to powerpc guys to check who to set that early. > > I missed the beginning of that discussion, I'll try to dig a bit, > might take me til next week though as I'm about to be off for > the week-end. > > Cheers, > Ben. > > > > . > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] mm/memcg: remove redundant checking on root memcg
Function __mem_cgroup_cancel_local_charge is only called by mem_cgroup_move_parent. For this case, root memcg has been checked by mem_cgroup_move_parent. So we needn't check that again in function __mem_cgroup_cancel_local_charge and just remove the check in function __mem_cgroup_cancel_local_charge. Signed-off-by: Wanpeng Li --- mm/memcontrol.c |3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6392c0a..d346347 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2404,9 +2404,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, { unsigned long bytes = nr_pages * PAGE_SIZE; - if (mem_cgroup_is_root(memcg)) - return; - res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); if (do_swap_account) res_counter_uncharge_until(&memcg->memsw, -- 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
On Tue, Jul 17, 2012 at 10:52:10AM +0200, Paolo Bonzini wrote: > Il 17/07/2012 10:29, Asias He ha scritto: > > So, vhost-blk at least saves ~6 syscalls for us in each request. > > Are they really 6? If I/O is coalesced by a factor of 3, for example > (i.e. each exit processes 3 requests), it's really 2 syscalls per request. > > Also, is there anything we can improve? Perhaps we can modify epoll and > ask it to clear the eventfd for us (would save 2 reads)? Or > io_getevents (would save 1)? > > > I guess you mean qemu here. Yes, in theory, qemu's block layer can be > > improved to achieve similar performance as vhost-blk or kvm tool's > > userspace virito-blk has. But I think it makes no sense to prevent one > > solution becase there is another in theory solution called: we can do > > similar in qemu. > > It depends. Like vhost-scsi, vhost-blk has the problem of a crippled > feature set: no support for block device formats, non-raw protocols, > etc. This makes it different from vhost-net. Well vhost-net is also more limited than virtio-net: no support for userspace networking, no support for level interrupts, no support for legacy qemu vlans, can not trace datapath in userspace, only virtio is supported. None of these is fundamental but this is how our implementation currently behaves so from user's point of view that's how it is. There are also fundamental limitations - e.g. it's linux only, a special module needs to be loaded and user needs to get an fd to the char device ... The way we addressed it, is by making it seamless for the user: basically if your setup matches what vhost-net can accelerate, it gets enabled, if not - userspace is used. Most of the logic is in libvirt. > So it begs the question, is it going to be used in production, or just a > useful reference tool? > > Paolo Sticking to raw already makes virtio-blk faster, doesn't it? In that vhost-blk looks to me like just another optimization option. Ideally I think user just should not care where do we handle virtio: in-kernel or in userspace. One can imagine it being enabled/disabled automatically if none of the features unsupported by it are used. For example currently you specify vhost=on for tap backend and then if you try to setup an unsupported by it like level interrupts, it gets disabled and userspace virtio is used. -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/36] AArch64 Linux kernel port
> Right, I would say that with any CPU core more powerful than this one > or with more than a few of these, you will also have trouble coming > up with workloads that really require the CPU performance but don't > also require a 64 bit virtual address space in either user space > or kernel. There are lots of them - soft radio for example can burn near infinite CPU resource depending upon the amount you are fishing out, but its pure throughput. Alan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V3] tmp patch to fix hotplug issue in CMCI storm
On Mon, Jul 16, 2012 at 10:22:16AM +0200, Thomas Gleixner wrote: > Date: Mon, 16 Jul 2012 10:22:16 +0200 (CEST) > From: Thomas Gleixner > To: Chen Gong > cc: tony.l...@intel.com, borislav.pet...@amd.com, pet...@infradead.org, > x...@kernel.org, linux-kernel@vger.kernel.org > Subject: Re: [PATCH V3] tmp patch to fix hotplug issue in CMCI storm > User-Agent: Alpine 2.02 (LFD 1266 2009-07-14) > > On Mon, 16 Jul 2012, Chen Gong wrote: > > > > Are you still care about this thread any more? Any plan to update it? > > Hope to get your feedback ASAP. > > Can you please collect the latest series and send it to lkml, Tony and > Boris. I think it's ok as is now. > > Thanks, > > tglx Fine, but what base I should use, -tip tree or Linus' tree? signature.asc Description: Digital signature
Re: linux-next: Tree for July 17 (mfd: AB3100 - ab3100_probe: undefined reference to `rand_initialize_irq')
On Tue, Jul 17, 2012 at 7:41 AM, Stephen Rothwell wrote: > Hi all, > > Changes since 20120716: > > The vfs tree lost its build failure. > > The l2-mtd tree gained a conflict against the mtd tree. > > The battery tree tree lost its build failure. > > The regulator tree gained conflicts against the mfd tree. > > The tty tree lost its build failure but gained another, so I used the > version from next-20120712. > > I have still reverted 3 commits from the signal tree at the request of the > arm maintainer. > > The akpm tree lost a few patches that turned up elsewhere. > >From my today's build-log on a Ubuntu/precise AMD64 host: [...] LD drivers/video/built-in.o LD drivers/built-in.o LINKvmlinux LD vmlinux.o MODPOST vmlinux.o GEN .version CHK include/generated/compile.h UPD include/generated/compile.h CC init/version.o LD init/built-in.o drivers/built-in.o: In function `ab3100_probe': ab3100-core.c:(.devinit.text+0xbf97): undefined reference to `rand_initialize_irq' make[2]: *** [vmlinux] Error 1 make[1]: *** [deb-pkg] Error 2 make: *** [deb-pkg] Error 2 MFD / AB3100 kernel-config settings: CONFIG_AB3100_CORE=y CONFIG_AB3100_OTP=m CONFIG_REGULATOR_AB3100=m CONFIG_RTC_DRV_AB3100=m If you need further informations, please let me know. - Sedat - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/5] Add vhost-blk support
On Tue, Jul 17, 2012 at 11:32:45AM +0200, Paolo Bonzini wrote: > Il 17/07/2012 11:21, Asias He ha scritto: > >> It depends. Like vhost-scsi, vhost-blk has the problem of a crippled > >> feature set: no support for block device formats, non-raw protocols, > >> etc. This makes it different from vhost-net. > > > > Data-plane qemu also has this cripppled feature set problem, no? > > Yes, but that is just a proof of concept. We can implement a separate > I/O thread within the QEMU block layer, and add fast paths that resemble > data-path QEMU, without limiting the feature set. > > > Does user always choose to use block devices format like qcow2? What > > if they prefer raw image or raw block device? > > If they do, the code should hit fast paths and be fast. But it should > be automatic, without the need for extra knobs. aio=thread vs. > aio=native is already one knob too much IMHO. Well one extra knob at qemu level is harmless IMO since the complexity can be handled by libvirt. For vhost-net libvirt already enables vhost automatically dependeing on backend used and I imagine a similar thing can happen here. > >> So it begs the question, is it going to be used in production, or just a > >> useful reference tool? > > > > This should be decided by user, I can not speak for them. What is wrong > > with adding one option for user which they can decide? > > Having to explain the user about the relative benefits; This can just be done automatically by libvirt. > having to > support the API; having to handle transition from one more thing when > something better comes out. > > Paolo Well this is true for any code. If the limited featureset which vhost-blk can accelerate is something many people use, then accelerating by 5-15% might outweight support costs. -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Kerneloops.org defunct?
> The reporting tool(Abrt) complicated the reporting and kerneloops.org > DNS still doesn't associated with the required ip address. ( Arjan? ) Anton - just talk to the kernel.org maintainers and get oops.kernel.org set up somewhere and be done with it. > - Starting from the Fedora 18 kernel oopses reporting will be > configured to send reports unconditionally, straight to the > kerneloops.org as it did the original tool. > (this, hopefully, will fix the volume of the oopses we are getting.) And probably the volume of complaints about spyware. It should be asking permission even if "until further notice" is a choice. For some environments this is a major issue. Consider people with ARM or x86 prototype machines who are contractually forbidden from revealing a lot of data about the systems. Alan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: linux-next: build failure after merge of the tty tree
On Tue, 17 Jul 2012 13:45:06 +1000 Stephen Rothwell wrote: > Hi Greg, > > After merging the tty tree, today's linux-next build (powerpc > ppc64_defconfig) failed like this: > > drivers/tty/tty_ioctl.c: In function 'set_sgflags': > drivers/tty/tty_ioctl.c:741:9: error: request for member 'c_iflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:742:9: error: request for member 'c_oflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:743:9: error: request for member 'c_lflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:745:10: error: request for member 'c_iflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:746:10: error: request for member 'c_lflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:749:10: error: request for member 'c_lflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:753:10: error: request for member 'c_oflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:756:10: error: request for member 'c_iflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:757:10: error: request for member 'c_lflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:759:15: error: request for member 'c_lflag' in > something not a structure or union > drivers/tty/tty_ioctl.c:760:10: error: request for member 'c_cc' in something > not a structure or union > drivers/tty/tty_ioctl.c:761:10: error: request for member 'c_cc' in something > not a structure or union > > Caused by commit adc8d746caa6 ("tty: move the termios object into the > tty"). Did anyone build test this? :-( It's known not to build. I sent Greg some patches but he wants them versus a different tree. I hope to have that done and tested in the next couple of days, until then I'd suggest you drop the tty tree - in particular because the version you are currently using scribbles on other bits of of memory. Alan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH REBASE] regulator: tps65910: set input_supply on desc unconditionally
On Tue, Jul 17, 2012 at 11:34:06AM +0530, Laxman Dewangan wrote: > Set the supply_name in the regulator descriptor unconditionally > and make this parameter as required parameter in the device > node for successfully registration of the regulator. Applied, thanks. signature.asc Description: Digital signature
Re: [PATCH RFT] regulator: palmas: Fix calcuating selector in palmas_map_voltage_smps
On Tue, Jul 17, 2012 at 11:29:03AM +0800, Axel Lin wrote: > The logic of calculating selector in palmas_map_voltage_smps() does not match > the logic to list voltage in palmas_list_voltage_smps(). Applied, thanks. signature.asc Description: Digital signature
Re: [RFC 3/3] memory-hotplug: bug fix race between isolation and allocation
Hi Minchan, On Tue, Jul 17, 2012 at 3:01 PM, Minchan Kim wrote: > Like below, memory-hotplug makes race between page-isolation > and page-allocation so it can hit BUG_ON in __offline_isolated_pages. > > CPU A CPU B > > start_isolate_page_range > set_migratetype_isolate > spin_lock_irqsave(zone->lock) > > free_hot_cold_page(Page A) > /* without zone->lock */ > migratetype = get_pageblock_migratetype(Page > A); > /* > * Page could be moved into MIGRATE_MOVABLE > * of per_cpu_pages > */ > list_add_tail(&page->lru, > &pcp->lists[migratetype]); > > set_pageblock_isolate > move_freepages_block > drain_all_pages > > /* Page A could be in MIGRATE_MOVABLE of > free_list. */ > > check_pages_isolated > __test_page_isolated_in_pageblock > /* > * We can't catch freed page which > * is free_list[MIGRATE_MOVABLE] > */ > if (PageBuddy(page A)) > pfn += 1 << page_order(page A); > > /* So, Page A could be allocated */ > > __offline_isolated_pages > /* > * BUG_ON hit or offline page > * which is used by someone > */ > BUG_ON(!PageBuddy(page A)); > > Signed-off-by: Minchan Kim > --- > I found this problem during code review so please confirm it. > Kame? > > mm/page_isolation.c |5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index acf65a7..4699d1f 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -196,8 +196,11 @@ __test_page_isolated_in_pageblock(unsigned long pfn, > unsigned long end_pfn) > continue; > } > page = pfn_to_page(pfn); > - if (PageBuddy(page)) > + if (PageBuddy(page)) { > pfn += 1 << page_order(page); > + if (get_page_migratetype(page) != MIGRATE_ISOLATE) > + break; > + } test_page_isolated() already have check get_pageblock_migratetype(page) != MIGRATE_ISOLATE. > else if (page_count(page) == 0 && > get_page_migratetype(page) == MIGRATE_ISOLATE) > pfn += 1; > -- > 1.7.9.5 > -- Regards, --Bob -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/