[PATCH] Revert "ARM: cns3xxx: pci: avoid potential stack overflow"
This reverts commit 498a92d42596a7a32c042319eb62a4c3d8081cf1. Krzysztof reported that this change broke Cavium CNS3xxx, ARMv6 (Laguna GW-2388) because the MRRS setting is never written to the hardware. Signed-off-by: Bjorn Helgaas CC: Arnd Bergmann CC: Krzysztof Hałasa --- arch/arm/mach-cns3xxx/pcie.c | 71 -- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c index 318394e..c622c30 100644 --- a/arch/arm/mach-cns3xxx/pcie.c +++ b/arch/arm/mach-cns3xxx/pcie.c @@ -65,9 +65,8 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus *bus, /* * The CNS PCI bridge doesn't fit into the PCI hierarchy, though -* we still want to access it. -* We place the host bridge on bus 0, and the directly connected -* device on bus 1, slot 0. +* we still want to access it. For this to work, we must place +* the first device on the same bus as the CNS PCI bridge. */ if (busno == 0) { /* internal PCIe bus, host bridge device */ if (devfn == 0) /* device# and function# are ignored by hw */ @@ -212,46 +211,58 @@ static void __init cns3xxx_pcie_check_link(struct cns3xxx_pcie *cnspci) } } -static void cns3xxx_write_config(struct cns3xxx_pcie *cnspci, -int where, int size, u32 val) -{ - void __iomem *base = cnspci->host_regs + (where & 0xffc); - u32 v; - u32 mask = (0x1ull << (size * 8)) - 1; - int shift = (where % 4) * 8; - - v = readl_relaxed(base); - - v &= ~(mask << shift); - v |= (val & mask) << shift; - - writel_relaxed(v, base); - readl_relaxed(base); -} - static void __init cns3xxx_pcie_hw_init(struct cns3xxx_pcie *cnspci) { + int port = cnspci->port; + struct pci_sys_data sd = { + .private_data = cnspci, + }; + struct pci_bus bus = { + .number = 0, + .ops = &cns3xxx_pcie_ops, + .sysdata = &sd, + }; u16 mem_base = cnspci->res_mem.start >> 16; u16 mem_limit = cnspci->res_mem.end >> 16; u16 io_base = cnspci->res_io.start >> 16; u16 io_limit = cnspci->res_io.end>> 16; + u32 devfn = 0; + u8 tmp8; + u16 pos; + u16 dc; + + pci_bus_write_config_byte(&bus, devfn, PCI_PRIMARY_BUS, 0); + pci_bus_write_config_byte(&bus, devfn, PCI_SECONDARY_BUS, 1); + pci_bus_write_config_byte(&bus, devfn, PCI_SUBORDINATE_BUS, 1); - cns3xxx_write_config(cnspci, PCI_PRIMARY_BUS, 1, 0); - cns3xxx_write_config(cnspci, PCI_SECONDARY_BUS, 1, 1); - cns3xxx_write_config(cnspci, PCI_SUBORDINATE_BUS, 1, 1); - cns3xxx_write_config(cnspci, PCI_MEMORY_BASE, 2, mem_base); - cns3xxx_write_config(cnspci, PCI_MEMORY_LIMIT, 2, mem_limit); - cns3xxx_write_config(cnspci, PCI_IO_BASE_UPPER16, 2, io_base); - cns3xxx_write_config(cnspci, PCI_IO_LIMIT_UPPER16, 2, io_limit); + pci_bus_read_config_byte(&bus, devfn, PCI_PRIMARY_BUS, &tmp8); + pci_bus_read_config_byte(&bus, devfn, PCI_SECONDARY_BUS, &tmp8); + pci_bus_read_config_byte(&bus, devfn, PCI_SUBORDINATE_BUS, &tmp8); + + pci_bus_write_config_word(&bus, devfn, PCI_MEMORY_BASE, mem_base); + pci_bus_write_config_word(&bus, devfn, PCI_MEMORY_LIMIT, mem_limit); + pci_bus_write_config_word(&bus, devfn, PCI_IO_BASE_UPPER16, io_base); + pci_bus_write_config_word(&bus, devfn, PCI_IO_LIMIT_UPPER16, io_limit); if (!cnspci->linked) return; /* Set Device Max_Read_Request_Size to 128 byte */ - pcie_bus_config = PCIE_BUS_PEER2PEER; - + bus.number = 1; /* directly connected PCIe device */ + devfn = PCI_DEVFN(0, 0); + pos = pci_bus_find_capability(&bus, devfn, PCI_CAP_ID_EXP); + pci_bus_read_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, &dc); + if (dc & PCI_EXP_DEVCTL_READRQ) { + dc &= ~PCI_EXP_DEVCTL_READRQ; + pci_bus_write_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, dc); + pci_bus_read_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, &dc); + if (dc & PCI_EXP_DEVCTL_READRQ) + pr_warn("PCIe: Unable to set device Max_Read_Request_Size\n"); + else + pr_info("PCIe: Max_Read_Request_Size set to 128 bytes\n"); + } /* Disable PCIe0 Interrupt Mask INTA to INTD */ - __raw_writel(~0x3FFF, MISC_PCIE_INT_MASK(cnspci->port)); + __raw_writel(~0x3FFF, MISC_PCIE_INT_MASK(port)); } static int cns3xxx_pcie_abort_handler(unsigned long addr, unsigned int fsr,
Re: [PATCH] HID: uhid: Fixes a bug with userspace bluetooth stacks, which causes hangs during certain operations
On Tue, May 31, 2016 at 09:10:36PM +, Colenbrander, Roelof wrote: > Hi, > > The patch in this thread is part of input work I'm doing with my team > and will hopefully be the first of many, but we are new to the > linux-input project. We shared this patch first in April and again > about 2 weeks ago in May, but we haven't received any feedback so far. It's been the middle of the merge window for the past 2 weeks, where no new code is usually reviewed or handled at all. Give the maintainers a bit of time to catch up. thanks, greg k-h
Re: [glibc] preadv/pwritev question
On 31 May 2016 17:00, Chris Metcalf wrote: > On 5/31/2016 4:04 PM, Yury Norov wrote: > > In path a63c7fa18a (Add sysdeps/unix/sysv/linux/generic/.) you add > > this: > > +++ b/sysdeps/unix/sysv/linux/generic/wordsize-32/preadv.c > > > > [...] > > > > +static ssize_t > > +do_preadv (int fd, const struct iovec *vector, int count, off_t > > offset) > > +{ > > + assert (sizeof (offset) == 4); > > + return INLINE_SYSCALL (preadv, __ALIGNMENT_COUNT (5, 6), fd, > > + vector, count, __ALIGNMENT_ARG > > + __LONG_LONG_PAIR (offset >> 31, offset)); > > +} > > + > > > > And this is the code that is picked up if I choose wordsize-32 for my > > AARCH64/ILP32. So I have questions. > > > > 1. What is the assert for? We agreed that all new ABIs will be 64-bit > > off_t only. > > > > I fixed it internally like this: > > +#ifndef __OFF_T_MATCHES_OFF64_T > > assert (sizeof (offset) == 4); > > +#endif > > > > There is a bunch of similar assertions in glibc. > > > > 2. This one looks weird: > > __LONG_LONG_PAIR (offset >> 31, offset)) > > Why 31-bit offset? And why you don't mask 2nd argument? > > Later in your patch I see this: > > +static ssize_t > > +do_preadv64 (int fd, const struct iovec *vector, int count, off64_t > > offset) > > > > +{ > > + return INLINE_SYSCALL (preadv, __ALIGNMENT_COUNT (5, 6), fd, > > + vector, count, __ALIGNMENT_ARG > > + __LONG_LONG_PAIR ((off_t) (offset >> 32), > > + (off_t) (offset & 0x))); > > +} > > > > And it looks correct to me. If 1st version is correct as well, I think > > it should be commented. > > I did this work before x32 came out, so I tried to model it more closely on > the existing x86 compat API. I agree that a 64-bit off_t model seems > reasonable; > however, the code does exactly what it does to match x86, namely preadv() > takes > a 32-bit offset, and preadv64() take a 64-bit offset. The assert() in preadv > to force > sizeof to be 4 is exactly why in that routine we use (offset >> 31, offset). > Since > we know offset fits in 32 bits, all we need to do is properly sign-extend it > into > 64 bits in the high register of the pair, which is what (offset >> 31) does - > you end > up with only 0 or -1, thus sign-extending the 32-bit signed off_t. Then in > preadv64() we actually need to break apart the 64-bit offset into a high 32 > bits > and a low 32 bits, which is what (offset >> 32, offset & 0x) does. > > For a 64-bit off_t you will want to not compile preadv.c at all, and instead > make > __libc_preadv() and friends be aliases of __libc_preadv64(). sounds like Adhemerval's pread/pwrite unify work should be extended to the preadv/pwritev funcs. it deals with the ilp32 case and uses the new SYSCALL_LL macro to deal with the ugly shifting/masking. check out these commits: https://sourceware.org/git/?p=glibc.git;a=commit;h=071af4769fcdfe2cd349157b01f27c9571478ace https://sourceware.org/git/?p=glibc.git;a=commit;h=77a4fbd53611720cd6ae845de560df5dd332b28e https://sourceware.org/git/?p=glibc.git;a=commit;h=eeddfa91cbb1a619af135c7a9ac14251ec094b7a -mike signature.asc Description: Digital signature
Re: [PATCH] Revert "ARM: cns3xxx: pci: avoid potential stack overflow"
[+cc Russell, linux-arm-kernel] On Tue, May 31, 2016 at 04:58:02PM -0500, Bjorn Helgaas wrote: > This reverts commit 498a92d42596a7a32c042319eb62a4c3d8081cf1. > > Krzysztof reported that this change broke Cavium CNS3xxx, ARMv6 (Laguna > GW-2388) because the MRRS setting is never written to the hardware. Krzysztof, can you test this and see whether it fixes the problem for you? > Signed-off-by: Bjorn Helgaas > CC: Arnd Bergmann > CC: Krzysztof Hałasa > --- > arch/arm/mach-cns3xxx/pcie.c | 71 > -- > 1 file changed, 41 insertions(+), 30 deletions(-) > > diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c > index 318394e..c622c30 100644 > --- a/arch/arm/mach-cns3xxx/pcie.c > +++ b/arch/arm/mach-cns3xxx/pcie.c > @@ -65,9 +65,8 @@ static void __iomem *cns3xxx_pci_map_bus(struct pci_bus > *bus, > > /* >* The CNS PCI bridge doesn't fit into the PCI hierarchy, though > - * we still want to access it. > - * We place the host bridge on bus 0, and the directly connected > - * device on bus 1, slot 0. > + * we still want to access it. For this to work, we must place > + * the first device on the same bus as the CNS PCI bridge. >*/ > if (busno == 0) { /* internal PCIe bus, host bridge device */ > if (devfn == 0) /* device# and function# are ignored by hw */ > @@ -212,46 +211,58 @@ static void __init cns3xxx_pcie_check_link(struct > cns3xxx_pcie *cnspci) > } > } > > -static void cns3xxx_write_config(struct cns3xxx_pcie *cnspci, > - int where, int size, u32 val) > -{ > - void __iomem *base = cnspci->host_regs + (where & 0xffc); > - u32 v; > - u32 mask = (0x1ull << (size * 8)) - 1; > - int shift = (where % 4) * 8; > - > - v = readl_relaxed(base); > - > - v &= ~(mask << shift); > - v |= (val & mask) << shift; > - > - writel_relaxed(v, base); > - readl_relaxed(base); > -} > - > static void __init cns3xxx_pcie_hw_init(struct cns3xxx_pcie *cnspci) > { > + int port = cnspci->port; > + struct pci_sys_data sd = { > + .private_data = cnspci, > + }; > + struct pci_bus bus = { > + .number = 0, > + .ops = &cns3xxx_pcie_ops, > + .sysdata = &sd, > + }; > u16 mem_base = cnspci->res_mem.start >> 16; > u16 mem_limit = cnspci->res_mem.end >> 16; > u16 io_base = cnspci->res_io.start >> 16; > u16 io_limit = cnspci->res_io.end>> 16; > + u32 devfn = 0; > + u8 tmp8; > + u16 pos; > + u16 dc; > + > + pci_bus_write_config_byte(&bus, devfn, PCI_PRIMARY_BUS, 0); > + pci_bus_write_config_byte(&bus, devfn, PCI_SECONDARY_BUS, 1); > + pci_bus_write_config_byte(&bus, devfn, PCI_SUBORDINATE_BUS, 1); > > - cns3xxx_write_config(cnspci, PCI_PRIMARY_BUS, 1, 0); > - cns3xxx_write_config(cnspci, PCI_SECONDARY_BUS, 1, 1); > - cns3xxx_write_config(cnspci, PCI_SUBORDINATE_BUS, 1, 1); > - cns3xxx_write_config(cnspci, PCI_MEMORY_BASE, 2, mem_base); > - cns3xxx_write_config(cnspci, PCI_MEMORY_LIMIT, 2, mem_limit); > - cns3xxx_write_config(cnspci, PCI_IO_BASE_UPPER16, 2, io_base); > - cns3xxx_write_config(cnspci, PCI_IO_LIMIT_UPPER16, 2, io_limit); > + pci_bus_read_config_byte(&bus, devfn, PCI_PRIMARY_BUS, &tmp8); > + pci_bus_read_config_byte(&bus, devfn, PCI_SECONDARY_BUS, &tmp8); > + pci_bus_read_config_byte(&bus, devfn, PCI_SUBORDINATE_BUS, &tmp8); > + > + pci_bus_write_config_word(&bus, devfn, PCI_MEMORY_BASE, mem_base); > + pci_bus_write_config_word(&bus, devfn, PCI_MEMORY_LIMIT, mem_limit); > + pci_bus_write_config_word(&bus, devfn, PCI_IO_BASE_UPPER16, io_base); > + pci_bus_write_config_word(&bus, devfn, PCI_IO_LIMIT_UPPER16, io_limit); > > if (!cnspci->linked) > return; > > /* Set Device Max_Read_Request_Size to 128 byte */ > - pcie_bus_config = PCIE_BUS_PEER2PEER; > - > + bus.number = 1; /* directly connected PCIe device */ > + devfn = PCI_DEVFN(0, 0); > + pos = pci_bus_find_capability(&bus, devfn, PCI_CAP_ID_EXP); > + pci_bus_read_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, &dc); > + if (dc & PCI_EXP_DEVCTL_READRQ) { > + dc &= ~PCI_EXP_DEVCTL_READRQ; > + pci_bus_write_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, > dc); > + pci_bus_read_config_word(&bus, devfn, pos + PCI_EXP_DEVCTL, > &dc); > + if (dc & PCI_EXP_DEVCTL_READRQ) > + pr_warn("PCIe: Unable to set device > Max_Read_Request_Size\n"); > + else > + pr_info("PCIe: Max_Read_Request_Size set to 128 > bytes\n"); > + } > /* Disable PCIe0 Interrupt Mask INTA to INTD */ > - __raw_writel(~0x3FFF, MISC_PCIE_INT_MASK(cnspci->port)); > + __raw_writel(~0x3FFF, MISC_PCIE_INT_MASK(port)); > } > > static int cns3xxx_pcie_abo
Re: [PATCH] lib/uuid.c: eliminate uuid_[bl]e_index arrays
> Functions with sized array arguments are generally undesired. > > Linus once wrote: (http://comments.gmane.org/gmane.linux.kernel/2031400) > > array arguments in C don't >actually exist. Sadly, compilers accept it for various bad historical >reasons, and silently turn it into just a pointer argument. There are >arguments for them, but they are from weak minds. > > Perhaps this would be better using simple pointers and without the __ > > static int __uuid_to_bin(const char *uuid, u8 *b, const u8 *si) I haven't looked up the full original discussion to see if this is a point on which I disagree with Linus, but I find it useful for documentation: this is not just a pointer to "some" bytes, this is a pointer to [LENGTH] bytes. It's a reminder to the caller that they'd better pass in a buffer of the required size. Obviosuly, it makes no actual difference to the compiler. C99 actually has a way to say this explicitly to the compiler, but the syntax is ugly: static int __uuid_to_bin(const char uuid[static 36], __u8 b[static 16], const u8 si[static 16]) (This includes the effect of __attribute__((nonnull)).) Further discussion at https://hamberg.no/erlend/posts/2013-02-18-static-array-indices.html https://stackoverflow.com/questions/3430315/what-is-the-purpose-of-static-keyword-in-array-parameter-of-function-like-char (FWIW, another two style points which I disagre with Linus about are that I don't mind "sizeof variable" without parens, and that I don't mind using a bare "0" for a null pointer. More substantially, I like "bool" a lot more than Linus does.)
Re: [PATCH 2/2] powercap/rapl: add support for denverton
On 05/31/2016 01:41 PM, Jacob Pan wrote: > --- a/drivers/powercap/intel_rapl.c > +++ b/drivers/powercap/intel_rapl.c > @@ -1137,6 +1137,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = > { > RAPL_CPU(0x57, rapl_defaults_hsw_server),/* Knights Landing */ > RAPL_CPU(0x8E, rapl_defaults_core),/* Kabylake */ > RAPL_CPU(0x9E, rapl_defaults_core),/* Kabylake */ > + RAPL_CPU(0x5F, rapl_defaults_core),/* Denverton micro server */ > {} > }; Not to derail this individual patch... but do we really want to continue open-coding CPU model/family combos all over arch/x86? For instance, arch/x86/events/intel/core.c has: > case 142: /* 14nm Kabylake Mobile */ > case 158: /* 14nm Kabylake Desktop */ > case 78: /* 14nm Skylake Mobile */ > case 94: /* 14nm Skylake Desktop */ > case 85: /* 14nm Skylake Server */ Which duplicates the two Kabylake family numbers from the RAPL_CPU() context above (just in decimal instead of hex). Should we just start sticking these things in a header like: #define X86_INTEL_FAMILY_KABYLAKE1 0x8E #define X86_INTEL_FAMILY_KABYLAKE2 0x9E #define X86_INTEL_FAMILY_DENVERTON 0x5F So we have this: RAPL_CPU(X86_INTEL_FAMILY_DENVERTON, rapl_defaults_core), instead of having to explain our magic number in a comment.
Re: [PATCH 6/6] mm, oom: fortify task_will_free_mem
On 05/31, Michal Hocko wrote: > > On Mon 30-05-16 19:35:05, Oleg Nesterov wrote: > > > > Well, let me suggest this again. I think it should do > > > > > > if (SIGNAL_GROUP_COREDUMP) > > return false; > > > > if (SIGNAL_GROUP_EXIT) > > return true; > > > > if (thread_group_empty() && PF_EXITING) > > return true; > > > > return false; > > > > we do not need fatal_signal_pending(), in this case SIGNAL_GROUP_EXIT should > > be set (ignoring some bugs with sub-namespaces which we need to fix anyway). > > OK, so we shouldn't care about race when the fatal_signal is set on the > task until it reaches do_group_exit? if fatal_signal() is true then (ignoring exec and coredump) SIGNAL_GROUP_EXIT is already set (again, ignoring the bugs with sub-namespace inits). At the same time, SIGKILL can be already dequeued when the task exits, so fatal_signal_pending() can be "false negative". > > And. I think this needs smp_rmb() at the end of the loop (assuming we have > > the > > process_shares_mm() check here). We need it to ensure that we read p->mm > > before > > we read next_task(), to avoid the race with exit() + clone(CLONE_VM). > > Why don't we need the same barrier in oom_kill_process? Because it calls do_send_sig_info() which takes ->siglock and copy_process() takes the same lock. Not a barrier, but acts the same way. > Which barrier it > would pair with? With the barrier implied by list_add_tail_rcu(&p->tasks, &init_task.tasks). > Anyway I think this would deserve it's own patch. > Barriers are always tricky and it is better to have them in a small > patch with a full explanation. OK, agreed. I am not sure I can read the new patch correctly, it depends on the previous changes... but afaics it looks good. Cosmetic/subjective nit, feel free to ignore, > +bool task_will_free_mem(struct task_struct *task) > +{ > + struct mm_struct *mm = NULL; unnecessary initialization ;) > + struct task_struct *p; > + bool ret; > + > + /* > + * If the process has passed exit_mm we have to skip it because > + * we have lost a link to other tasks sharing this mm, we do not > + * have anything to reap and the task might then get stuck waiting > + * for parent as zombie and we do not want it to hold TIF_MEMDIE > + */ > + p = find_lock_task_mm(task); > + if (!p) > + return false; > + > + if (!__task_will_free_mem(p)) { > + task_unlock(p); > + return false; > + } We can call the 1st __task_will_free_mem(p) before find_lock_task_mm(). In the likely case (I think) it should return false. And since __task_will_free_mem() has no other callers perhaps it should go into oom_kill.c too. Oleg.
Re: Regression in IO resource allocation
On Tue, May 31, 2016 at 11:42 PM, Roland Dreier wrote: > On Tue, May 31, 2016 at 2:11 PM, Rafael J. Wysocki wrote: >> Can you please try the appended patch (untested)? > > Thanks for the quick reply. Patch looks OK on my system... it boots > (which is very good :) and I see > > system 00:01: [io 0x0400-0x047f] has been reserved > > however I don't see the "ACPI CPU throttle" region reserved in > /proc/ioports... haven't debugged why acpi_processor_get_throttling() > isn't getting called or what is happening yet. > > Will dig a bit deeper and let you know. It may not be called at all if _PTC is used on that system, for example.
Re: Regression in IO resource allocation
On Wed, Jun 1, 2016 at 12:31 AM, Rafael J. Wysocki wrote: > On Tue, May 31, 2016 at 11:42 PM, Roland Dreier > wrote: >> On Tue, May 31, 2016 at 2:11 PM, Rafael J. Wysocki >> wrote: >>> Can you please try the appended patch (untested)? >> >> Thanks for the quick reply. Patch looks OK on my system... it boots >> (which is very good :) and I see >> >> system 00:01: [io 0x0400-0x047f] has been reserved >> >> however I don't see the "ACPI CPU throttle" region reserved in >> /proc/ioports... haven't debugged why acpi_processor_get_throttling() >> isn't getting called or what is happening yet. >> >> Will dig a bit deeper and let you know. > > It may not be called at all if _PTC is used on that system, for example. I mean acpi_processor_get_throttling_fadt(), of course. :-)
Re: [PATCH] qed: fix qed_fill_link() error handling
On Tuesday, May 31, 2016 2:20:46 PM CEST David Miller wrote: > From: Yuval Mintz > Date: Mon, 30 May 2016 16:24:07 + > > >> +if (IS_ENABLED(CONFIG_QED_SRIOV) && !IS_PF(hwfn->cdev)) { > >> +qed_vf_get_link_params(hwfn, params); > >> +qed_vf_get_link_state(hwfn, link); > >> +qed_vf_get_link_caps(hwfn, link_caps); > >> + > >> +return 0; > >> +} > > > > The IS_ENABLED here seems a bit wasteful to me - we have empty > > implementation > > under qed_vf.h just for this case [I.e., that SRIOV isn't enabled for qed]. > > If all we're trying achieve is removing these gcc warnings, I think we can > > simply > > memset the structs in the currently-empty qed_vf_get_link_* functions. Adding a memset() to those functions would add a bit of overhead in code size because that ends up being unused in practice without a way for the compiler to know, I added the IS_ENABLED() check to reduce the object code size here by also eliminating the check for IS_PF(). > I think both solutions are equally valid/elegant. > > Arnd? I think we can just remove the IS_ENABLED() check there and define the IS_PF() macro conditionally to become 'true' if CONFIG_QED_SRIOV is not set, like some other drivers do diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 287f61c20c19..756176525cf9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1110,7 +1110,7 @@ static int qed_get_link_data(struct qed_hwfn *hwfn, { void *p; - if (IS_ENABLED(CONFIG_QED_SRIOV) && !IS_PF(hwfn->cdev)) { + if (!IS_PF(hwfn->cdev)) { qed_vf_get_link_params(hwfn, params); qed_vf_get_link_state(hwfn, link); qed_vf_get_link_caps(hwfn, link_caps); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index c8667c65e685..c90b2b6ad969 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -12,11 +12,13 @@ #include "qed_vf.h" #define QED_VF_ARRAY_LENGTH (3) +#ifdef CONFIG_QED_SRIOV #define IS_VF(cdev) ((cdev)->b_is_vf) #define IS_PF(cdev) (!((cdev)->b_is_vf)) -#ifdef CONFIG_QED_SRIOV #define IS_PF_SRIOV(p_hwfn) (!!((p_hwfn)->cdev->p_iov_info)) #else +#define IS_VF(cdev) (0) +#define IS_PF(cdev) (1) #define IS_PF_SRIOV(p_hwfn) (0) #endif #define IS_PF_SRIOV_ALLOC(p_hwfn) (!!((p_hwfn)->pf_iov_info)) I don't see why that isn't already the case actually. If this is ok, I'll send an updated patch. For the PF case, we still need to fix the qed_mcp_get_link_params() failure case, so the rest of my patch is needed anyway, regardless of how we address the warning. Arnd
Re: [PATCH v4 0/5] /dev/random - a new approach
I'll be a while going through this. I was thinking about our earlier discussion where I was hammering on the point that compressing entropy too early is a mistake, and just now realized that I should have given you credit for my recent 4.7-rc1 patch 2a18da7a. The hash function ("good, fast AND cheap!") introduced there exploits that point: using a larger hash state (and postponing compression to the final size) dramatically reduces the requirements on the hash mixing function. I wasn't conscious of it at the time, but I just now realized that explaining it clarified the point in my mind, which led to applying the principle in other situations. So thank you!
Re: [RFC PATCH 1/2] rcu: sysctl: Panic on RCU Stall
On 05/31/2016 04:23 PM, Josh Triplett wrote: Hi Josh, > Sorry, realized something else a moment after sending: I don't think > this will build if you use the tiny RCU implementation. That > implementation *does* support tracing, and if you enable tracing, > you'll have CONFIG_RCU_STALL_COMMON=y, but you won't build tree.c where > the variable definition lives. So, the sysctl code will reference a > variable that doesn't exist. Good catch! [root@f23 linux]# egrep '(TINY_RCU|RCU_TRACE)' .config CONFIG_TINY_RCU=y # CONFIG_TREE_RCU_TRACE is not set CONFIG_RCU_TRACE=y [root@f23 linux]# make ... kernel/built-in.o:(.data+0x4688): undefined reference to `sysctl_panic_on_rcu_stall' Makefile:937: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 How about this change in the condition: -#ifdef CONFIG_RCU_STALL_COMMON +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) It worked fine here, but I may be missing something, so I better check with the experts :-). Thanks for reviewing the patch set! -- Daniel
Re: [PATCH 1/6] proc, oom: drop bogus task_lock and mm check
On 05/31, Michal Hocko wrote: > > Oleg has pointed out that can simplify both oom_adj_write and > oom_score_adj_write even further and drop the sighand lock. The only > purpose of the lock was to protect p->signal from going away but this > will not happen since ea6d290ca34c ("signals: make task_struct->signal > immutable/refcountable"). Sorry for confusion, I meant oom_adj_read() and oom_score_adj_read(). As for oom_adj_write/oom_score_adj_write we can remove it too, but then we need to ensure (say, using cmpxchg) that unpriviliged user can not not decrease signal->oom_score_adj_min if its oom_score_adj_write() races with someone else (say, admin) which tries to increase the same oom_score_adj_min. If you think this is not a problem - I am fine with this change. But please also update oom_adj_read/oom_score_adj_read ;) Oleg.
Re: script relative shebang
Hi Boris, Boris Rybalkin writes: > I would like to know if any changes to parsing '#!' script header line > are accepted in particular having ability to run interpreter from > relative to the script path? > > Something like: > > #!{dirname}/python/bin/python > > Where {dirname} is a special keyword replaced with dirname of a script. Just for the record, this can already be done without any help from the kernel: Assuming the following demonstration directory layout /subdir/catself /relshebang where catself.sh is your "interpreter": #!/bin/sh tail -n +2 $1 and relshebang is your script file invoking the toy interpreter from its shebang as follows: #!/usr/bin/gawk {exit system("/bin/sh -c 'exec \"$(dirname \"$0\")\"/subdir/catself \"$0\"' " FILENAME);} Hello world. You don't necessarily need to use gawk here, anything being able to do system() and taking some code snippet from its first argument will certainly work. If this is too ugly, you could also write your own wrapper a la /usr/bin/env and install that at some central location. Best, Nicolai
Re: [PATCH v6v3 02/12] mm: migrate: support non-lru movable page migration
On Tue, May 31, 2016 at 09:52:48AM +0200, Vlastimil Babka wrote: > On 05/31/2016 02:01 AM, Minchan Kim wrote: > >Per Vlastimi's review comment. > > > >Thanks for the detail review, Vlastimi! > >If you have another concern, feel free to say. > > I don't for now :) > > [...] > > >Cc: Rik van Riel > >Cc: Vlastimil Babka > >Cc: Joonsoo Kim > >Cc: Mel Gorman > >Cc: Hugh Dickins > >Cc: Rafael Aquini > >Cc: virtualizat...@lists.linux-foundation.org > >Cc: Jonathan Corbet > >Cc: John Einar Reitan > >Cc: dri-de...@lists.freedesktop.org > >Cc: Sergey Senozhatsky > >Signed-off-by: Gioh Kim > >Signed-off-by: Minchan Kim > > Acked-by: Vlastimil Babka Thanks for the review, Vlastimil!
[PATCH 2/2] ASoC: cs53l30: Check return value of regcache_sync()
Regcache_sync() might fail. So this patch adds a return value Check for it. Signed-off-by: Nicolin Chen --- sound/soc/codecs/cs53l30.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index 9aff449..ac90dd7 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -1055,7 +1055,11 @@ static int cs53l30_runtime_resume(struct device *dev) gpiod_set_value_cansleep(cs53l30->reset_gpio, 1); regcache_cache_only(cs53l30->regmap, false); - regcache_sync(cs53l30->regmap); + ret = regcache_sync(cs53l30->regmap); + if (ret) { + dev_err(dev, "failed to synchronize regcache: %d\n", ret); + return ret; + } return 0; } -- 2.1.4
[PATCH 1/2] ASoC: cs53l30: Rename the volume controls for preamplifier
Volume controls should end with 'Volume', so this patch renames them for ADC preamplifier. Signed-off-by: Nicolin Chen --- sound/soc/codecs/cs53l30.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index 714e579..9aff449 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -331,10 +331,10 @@ static const struct snd_kcontrol_new cs53l30_snd_controls[] = { SOC_SINGLE_TLV("ADC2 NG Boost Volume", CS53L30_ADC2_NG_CTL, CS53L30_ADCx_NG_BOOST_SHIFT, 1, 0, adc_ng_boost_tlv), - SOC_DOUBLE_R_TLV("ADC1 Pre Amp Gain", CS53L30_ADC1A_AFE_CTL, + SOC_DOUBLE_R_TLV("ADC1 Preamplifier Volume", CS53L30_ADC1A_AFE_CTL, CS53L30_ADC1B_AFE_CTL, CS53L30_ADCxy_PREAMP_SHIFT, 2, 0, pga_preamp_tlv), - SOC_DOUBLE_R_TLV("ADC2 Pre Amp Gain", CS53L30_ADC2A_AFE_CTL, + SOC_DOUBLE_R_TLV("ADC2 Preamplifier Volume", CS53L30_ADC2A_AFE_CTL, CS53L30_ADC2B_AFE_CTL, CS53L30_ADCxy_PREAMP_SHIFT, 2, 0, pga_preamp_tlv), -- 2.1.4
Re: [PATCH] timekeeping: Fix 1ns/tick drift with GENERIC_TIME_VSYSCALL_OLD
On Tue, May 31, 2016 at 6:06 AM, Thomas Graziadei wrote: > From: Thomas Graziadei > > The user notices the problem in a raw and real time drift, calling > clock_gettime with CLOCK_REALTIME / CLOCK_MONOTONIC_RAW on a system > with no ntp correction taking place (no ntpd or ptp stuff running). Hmm.. Curious. Was it actually drifting, or was it just oscillating/ringing near the RAW clock's value? > The problem is, that old_vsyscall_fixup adds an extra 1ns even though > xtime_nsec is already held in full nsecs and the remainder in this > case is 0. Do the rounding up buisness only if needed. The patch looks ok. But I'm curious what architecture you were seeing this on (ia64, powerpc?), as it would be much nicer to have those architectures migrate off of the old low-res vsyscall calculation and use the newer method with sub-ns precision, instead of trying to further fix up the deprecated method. I had submitted a patch to convert ia64 awhile back, but I don't recall getting much feedback. thanks -john
Re: [RFC PATCH] livepatch: allow removal of a disabled patch
On Tue, 3 May 2016, Miroslav Benes wrote: > > > Currently we do not allow patch module to unload since there is no > > > method to determine if a task is still running in the patched code. > > > > > > The consistency model gives us the way because when the patching > > > finishes we know that all tasks were marked as safe to call a new > > > patched function. Thus every new call to the function calls the new > > > patched code and at the same time no task can be somewhere in the old > > > code, because it had to leave that code to be marked as safe. > > > > > > We can safely let the patch module go after that. > > > > I found this a little confusing because it talks about patching, whereas > > we really want to remove the patch module after _unpatching_ it. > > You're right. I'll rephrase that. Now that it's been settled that this way (completion) is the way to go, could you please incorporate the feedback (and persumably also add Acks from Josh and Jessica) and send me v2? Thanks, -- Jiri Kosina SUSE Labs
[PATCH v7 05/12] zsmalloc: use bit_spin_lock
Use kernel standard bit spin-lock instead of custom mess. Even, it has a bug which doesn't disable preemption. The reason we don't have any problem is that we have used it during preemption disable section by class->lock spinlock. So no need to go to stable. Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 10 +++--- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 79295c73dc9f..39f29aedd5d6 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -868,21 +868,17 @@ static unsigned long obj_idx_to_offset(struct page *page, static inline int trypin_tag(unsigned long handle) { - unsigned long *ptr = (unsigned long *)handle; - - return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr); + return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); } static void pin_tag(unsigned long handle) { - while (!trypin_tag(handle)); + bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); } static void unpin_tag(unsigned long handle) { - unsigned long *ptr = (unsigned long *)handle; - - clear_bit_unlock(HANDLE_PIN_BIT, ptr); + bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); } static void reset_page(struct page *page) -- 1.9.1
[PATCH v7 11/12] zsmalloc: page migration support
This patch introduces run-time migration feature for zspage. For migration, VM uses page.lru field so it would be better to not use page.next field which is unified with page.lru for own purpose. For that, firstly, we can get first object offset of the page via runtime calculation instead of using page.index so we can use page.index as link for page chaining instead of page.next. In case of huge object, it stores handle to page.index instead of next link of page chaining because huge object doesn't need to next link for page chaining. So get_next_page need to identify huge object to return NULL. For it, this patch uses PG_owner_priv_1 flag of the page flag. For migration, it supports three functions * zs_page_isolate It isolates a zspage which includes a subpage VM want to migrate from class so anyone cannot allocate new object from the zspage. We could try to isolate a zspage by the number of subpage so subsequent isolation trial of other subpage of the zpsage shouldn't fail. For that, we introduce zspage.isolated count. With that, zs_page_isolate can know whether zspage is already isolated or not for migration so if it is isolated for migration, subsequent isolation trial can be successful without trying further isolation. * zs_page_migrate First of all, it holds write-side zspage->lock to prevent migrate other subpage in zspage. Then, lock all objects in the page VM want to migrate. The reason we should lock all objects in the page is due to race between zs_map_object and zs_page_migrate. zs_map_object zs_page_migrate pin_tag(handle) obj = handle_to_obj(handle) obj_to_location(obj, &page, &obj_idx); write_lock(&zspage->lock) if (!trypin_tag(handle)) goto unpin_object zspage = get_zspage(page); read_lock(&zspage->lock); If zs_page_migrate doesn't do trypin_tag, zs_map_object's page can be stale by migration so it goes crash. If it locks all of objects successfully, it copies content from old page to new one, finally, create new zspage chain with new page. And if it's last isolated subpage in the zspage, put the zspage back to class. * zs_page_putback It returns isolated zspage to right fullness_group list if it fails to migrate a page. If it find a zspage is ZS_EMPTY, it queues zspage freeing to workqueue. See below about async zspage freeing. This patch introduces asynchronous zspage free. The reason to need it is we need page_lock to clear PG_movable but unfortunately, zs_free path should be atomic so the apporach is try to grab page_lock. If it got page_lock of all of pages successfully, it can free zspage immediately. Otherwise, it queues free request and free zspage via workqueue in process context. If zs_free finds the zspage is isolated when it try to free zspage, it delays the freeing until zs_page_putback finds it so it will free free the zspage finally. In this patch, we expand fullness_list from ZS_EMPTY to ZS_FULL. First of all, it will use ZS_EMPTY list for delay freeing. And with adding ZS_FULL list, it makes to identify whether zspage is isolated or not via list_empty(&zspage->list) test. Cc: Sergey Senozhatsky Signed-off-by: Minchan Kim --- include/uapi/linux/magic.h | 1 + mm/zsmalloc.c | 793 ++--- 2 files changed, 672 insertions(+), 122 deletions(-) diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d829ce63529d..e398beac67b8 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -81,5 +81,6 @@ /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC0x15013346 #define BALLOON_KVM_MAGIC 0x13661366 +#define ZSMALLOC_MAGIC 0x58295829 #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c6fb543cfb98..a80100db16d6 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -17,14 +17,14 @@ * * Usage of struct page fields: * page->private: points to zspage - * page->index: offset of the first object starting in this page. - * For the first page, this is always 0, so we use this field - * to store handle for huge object. - * page->next: links together all component pages of a zspage + * page->freelist(index): links together all component pages of a zspage + * For the huge page, this is always 0, so we use this field + * to store handle. * * Usage of struct page flags: * PG_private: identifies the first component page * PG_private2: identifies the last component page + * PG_owner_priv_1: indentifies the huge component page * */ @@ -49,6 +49,11 @@ #include #include #include +#include +#include +#include + +#define ZSPAGE_MAGIC 0x58 /* * This must be power of 2 and greater than of equal to sizeof(link_free). @@ -136,25 +141,23 @@ * We do not maintain
[PATCH v7 04/12] zsmalloc: keep max_object in size_class
Every zspage in a size_class has same number of max objects so we could move it to a size_class. Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b6d4f258cb53..79295c73dc9f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -32,8 +32,6 @@ * page->freelist: points to the first free object in zspage. * Free objects are linked together using in-place * metadata. - * page->objects: maximum number of objects we can store in this - * zspage (class->zspage_order * PAGE_SIZE / class->size) * page->lru: links together first pages of various zspages. * Basically forming list of zspages in a fullness group. * page->mapping: class index and fullness group of the zspage @@ -213,6 +211,7 @@ struct size_class { * of ZS_ALIGN. */ int size; + int objs_per_zspage; unsigned int index; struct zs_size_stat stats; @@ -631,21 +630,22 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool) * the pool (not yet implemented). This function returns fullness * status of the given page. */ -static enum fullness_group get_fullness_group(struct page *first_page) +static enum fullness_group get_fullness_group(struct size_class *class, + struct page *first_page) { - int inuse, max_objects; + int inuse, objs_per_zspage; enum fullness_group fg; VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); inuse = first_page->inuse; - max_objects = first_page->objects; + objs_per_zspage = class->objs_per_zspage; if (inuse == 0) fg = ZS_EMPTY; - else if (inuse == max_objects) + else if (inuse == objs_per_zspage) fg = ZS_FULL; - else if (inuse <= 3 * max_objects / fullness_threshold_frac) + else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac) fg = ZS_ALMOST_EMPTY; else fg = ZS_ALMOST_FULL; @@ -732,7 +732,7 @@ static enum fullness_group fix_fullness_group(struct size_class *class, enum fullness_group currfg, newfg; get_zspage_mapping(first_page, &class_idx, &currfg); - newfg = get_fullness_group(first_page); + newfg = get_fullness_group(class, first_page); if (newfg == currfg) goto out; @@ -1012,9 +1012,6 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) init_zspage(class, first_page); first_page->freelist = location_to_obj(first_page, 0); - /* Maximum number of objects we can store in this zspage */ - first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; - error = 0; /* Success */ cleanup: @@ -1242,11 +1239,11 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) return true; } -static bool zspage_full(struct page *first_page) +static bool zspage_full(struct size_class *class, struct page *first_page) { VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); - return first_page->inuse == first_page->objects; + return first_page->inuse == class->objs_per_zspage; } unsigned long zs_get_total_pages(struct zs_pool *pool) @@ -1632,7 +1629,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, } /* Stop if there is no more space */ - if (zspage_full(d_page)) { + if (zspage_full(class, d_page)) { unpin_tag(handle); ret = -ENOMEM; break; @@ -1691,7 +1688,7 @@ static enum fullness_group putback_zspage(struct zs_pool *pool, { enum fullness_group fullness; - fullness = get_fullness_group(first_page); + fullness = get_fullness_group(class, first_page); insert_zspage(class, fullness, first_page); set_zspage_mapping(first_page, class->index, fullness); @@ -1943,8 +1940,9 @@ struct zs_pool *zs_create_pool(const char *name) class->size = size; class->index = i; class->pages_per_zspage = pages_per_zspage; - if (pages_per_zspage == 1 && - get_maxobj_per_zspage(size, pages_per_zspage) == 1) + class->objs_per_zspage = class->pages_per_zspage * + PAGE_SIZE / class->size; + if (pages_per_zspage == 1 && class->objs_per_zspage == 1) class->huge = true; spin_lock_init(&class->lock); pool->size_class[i] = class; -- 1.9.1
[PATCH v7 08/12] zsmalloc: introduce zspage structure
We have squeezed meta data of zspage into first page's descriptor. So, to get meta data from subpage, we should get first page first of all. But it makes trouble to implment page migration feature of zsmalloc because any place where to get first page from subpage can be raced with first page migration. IOW, first page it got could be stale. For preventing it, I have tried several approahces but it made code complicated so finally, I concluded to separate metadata from first page. Of course, it consumes more memory. IOW, 16bytes per zspage on 32bit at the moment. It means we lost 1% at *worst case*(40B/4096B) which is not bad I think at the cost of maintenance. Cc: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/compaction.c | 1 - mm/zsmalloc.c | 531 ++-- 2 files changed, 242 insertions(+), 290 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index b7bfdf94b545..d1d2063b4fd9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 07485a2e5b96..c6d2cbe0f19f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -16,26 +16,11 @@ * struct page(s) to form a zspage. * * Usage of struct page fields: - * page->private: points to the first component (0-order) page - * page->index (union with page->freelist): offset of the first object - * starting in this page. For the first page, this is - * always 0, so we use this field (aka freelist) to point - * to the first free object in zspage. - * page->lru: links together all component pages (except the first page) - * of a zspage - * - * For _first_ page only: - * - * page->private: refers to the component page after the first page - * If the page is first_page for huge object, it stores handle. - * Look at size_class->huge. - * page->freelist: points to the first free object in zspage. - * Free objects are linked together using in-place - * metadata. - * page->lru: links together first pages of various zspages. - * Basically forming list of zspages in a fullness group. - * page->mapping: class index and fullness group of the zspage - * page->inuse: the number of objects that are used in this zspage + * page->private: points to zspage + * page->index: offset of the first object starting in this page. + * For the first page, this is always 0, so we use this field + * to store handle for huge object. + * page->next: links together all component pages of a zspage * * Usage of struct page flags: * PG_private: identifies the first component page @@ -147,7 +132,7 @@ * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN * (reason above) */ -#define ZS_SIZE_CLASS_DELTA(PAGE_SIZE >> 8) +#define ZS_SIZE_CLASS_DELTA(PAGE_SIZE >> CLASS_BITS) /* * We do not maintain any list for completely empty or full pages @@ -155,8 +140,6 @@ enum fullness_group { ZS_ALMOST_FULL, ZS_ALMOST_EMPTY, - _ZS_NR_FULLNESS_GROUPS, - ZS_EMPTY, ZS_FULL }; @@ -205,7 +188,7 @@ static const int fullness_threshold_frac = 4; struct size_class { spinlock_t lock; - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; + struct list_head fullness_list[2]; /* * Size of objects stored in this class. Must be multiple * of ZS_ALIGN. @@ -224,7 +207,7 @@ struct size_class { /* * Placed within free objects to form a singly linked list. - * For every zspage, first_page->freelist gives head of this list. + * For every zspage, zspage->freeobj gives head of this list. * * This must be power of 2 and less than or equal to ZS_ALIGN */ @@ -247,6 +230,7 @@ struct zs_pool { struct size_class **size_class; struct kmem_cache *handle_cachep; + struct kmem_cache *zspage_cachep; atomic_long_t pages_allocated; @@ -268,14 +252,19 @@ struct zs_pool { * A zspage's class index and fullness group * are encoded in its (first)page->mapping */ -#define FULLNESS_BITS 4 -#define CLASS_BITS 28 +#define FULLNESS_BITS 2 +#define CLASS_BITS 8 -#define FULLNESS_SHIFT 0 -#define CLASS_SHIFT(FULLNESS_SHIFT + FULLNESS_BITS) - -#define FULLNESS_MASK ((1UL << FULLNESS_BITS) - 1) -#define CLASS_MASK ((1UL << CLASS_BITS) - 1) +struct zspage { + struct { + unsigned int fullness:FULLNESS_BITS; + unsigned int class:CLASS_BITS; + }; + unsigned int inuse; + void *freeobj; + struct page *first_page; + struct list_head list; /* fullness list */ +}; struct mapping_area { #ifdef CONFIG_PGTABLE_MAPPING @@ -287,29 +276,51 @@ struct mapping_area { enum zs_mapmode vm_mm; /* mapping mode */ }; -st
[PATCH v7 07/12] zsmalloc: factor page chain functionality out
For page migration, we need to create page chain of zspage dynamically so this patch factors it out from alloc_zspage. Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 59 +++ 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5da80961ff3e..07485a2e5b96 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -960,7 +960,8 @@ static void init_zspage(struct size_class *class, struct page *first_page) unsigned long off = 0; struct page *page = first_page; - VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); + first_page->freelist = NULL; + set_zspage_inuse(first_page, 0); while (page) { struct page *next_page; @@ -996,15 +997,16 @@ static void init_zspage(struct size_class *class, struct page *first_page) page = next_page; off %= PAGE_SIZE; } + + set_freeobj(first_page, (unsigned long)location_to_obj(first_page, 0)); } -/* - * Allocate a zspage for the given size class - */ -static struct page *alloc_zspage(struct size_class *class, gfp_t flags) +static void create_page_chain(struct page *pages[], int nr_pages) { - int i, error; - struct page *first_page = NULL, *uninitialized_var(prev_page); + int i; + struct page *page; + struct page *prev_page = NULL; + struct page *first_page = NULL; /* * Allocate individual pages and link them together as: @@ -1017,20 +1019,14 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) * (i.e. no other sub-page has this flag set) and PG_private_2 to * identify the last page. */ - error = -ENOMEM; - for (i = 0; i < class->pages_per_zspage; i++) { - struct page *page; - - page = alloc_page(flags); - if (!page) - goto cleanup; + for (i = 0; i < nr_pages; i++) { + page = pages[i]; INIT_LIST_HEAD(&page->lru); - if (i == 0) { /* first page */ + if (i == 0) { SetPagePrivate(page); set_page_private(page, 0); first_page = page; - set_zspage_inuse(first_page, 0); } if (i == 1) set_page_private(first_page, (unsigned long)page); @@ -1038,22 +1034,37 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) set_page_private(page, (unsigned long)first_page); if (i >= 2) list_add(&page->lru, &prev_page->lru); - if (i == class->pages_per_zspage - 1) /* last page */ + if (i == nr_pages - 1) SetPagePrivate2(page); prev_page = page; } +} - init_zspage(class, first_page); +/* + * Allocate a zspage for the given size class + */ +static struct page *alloc_zspage(struct size_class *class, gfp_t flags) +{ + int i; + struct page *first_page = NULL; + struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE]; - set_freeobj(first_page, (unsigned long)location_to_obj(first_page, 0)); - error = 0; /* Success */ + for (i = 0; i < class->pages_per_zspage; i++) { + struct page *page; -cleanup: - if (unlikely(error) && first_page) { - free_zspage(first_page); - first_page = NULL; + page = alloc_page(flags); + if (!page) { + while (--i >= 0) + __free_page(pages[i]); + return NULL; + } + pages[i] = page; } + create_page_chain(pages, class->pages_per_zspage); + first_page = pages[0]; + init_zspage(class, first_page); + return first_page; } -- 1.9.1
[PATCH v7 09/12] zsmalloc: separate free_zspage from putback_zspage
Currently, putback_zspage does free zspage under class->lock if fullness become ZS_EMPTY but it makes trouble to implement locking scheme for new zspage migration. So, this patch is to separate free_zspage from putback_zspage and free zspage out of class->lock which is preparation for zspage migration. Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 27 +++ 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c6d2cbe0f19f..dd3708611f65 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1687,14 +1687,12 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source) /* * putback_zspage - add @zspage into right class's fullness list - * @pool: target pool * @class: destination class * @zspage: target page * * Return @zspage's fullness_group */ -static enum fullness_group putback_zspage(struct zs_pool *pool, - struct size_class *class, +static enum fullness_group putback_zspage(struct size_class *class, struct zspage *zspage) { enum fullness_group fullness; @@ -1703,15 +1701,6 @@ static enum fullness_group putback_zspage(struct zs_pool *pool, insert_zspage(class, zspage, fullness); set_zspage_mapping(zspage, class->index, fullness); - if (fullness == ZS_EMPTY) { - zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( - class->size, class->pages_per_zspage)); - atomic_long_sub(class->pages_per_zspage, - &pool->pages_allocated); - - free_zspage(pool, zspage); - } - return fullness; } @@ -1760,23 +1749,29 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) if (!migrate_zspage(pool, class, &cc)) break; - putback_zspage(pool, class, dst_zspage); + putback_zspage(class, dst_zspage); } /* Stop if we couldn't find slot */ if (dst_zspage == NULL) break; - putback_zspage(pool, class, dst_zspage); - if (putback_zspage(pool, class, src_zspage) == ZS_EMPTY) + putback_zspage(class, dst_zspage); + if (putback_zspage(class, src_zspage) == ZS_EMPTY) { + zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( + class->size, class->pages_per_zspage)); + atomic_long_sub(class->pages_per_zspage, + &pool->pages_allocated); + free_zspage(pool, src_zspage); pool->stats.pages_compacted += class->pages_per_zspage; + } spin_unlock(&class->lock); cond_resched(); spin_lock(&class->lock); } if (src_zspage) - putback_zspage(pool, class, src_zspage); + putback_zspage(class, src_zspage); spin_unlock(&class->lock); } -- 1.9.1
[PATCH v7 01/12] mm: use put_page to free page instead of putback_lru_page
Procedure of page migration is as follows: First of all, it should isolate a page from LRU and try to migrate the page. If it is successful, it releases the page for freeing. Otherwise, it should put the page back to LRU list. For LRU pages, we have used putback_lru_page for both freeing and putback to LRU list. It's okay because put_page is aware of LRU list so if it releases last refcount of the page, it removes the page from LRU list. However, It makes unnecessary operations (e.g., lru_cache_add, pagevec and flags operations. It would be not significant but no worth to do) and harder to support new non-lru page migration because put_page isn't aware of non-lru page's data structure. To solve the problem, we can add new hook in put_page with PageMovable flags check but it can increase overhead in hot path and needs new locking scheme to stabilize the flag check with put_page. So, this patch cleans it up to divide two semantic(ie, put and putback). If migration is successful, use put_page instead of putback_lru_page and use putback_lru_page only on failure. That makes code more readable and doesn't add overhead in put_page. Comment from Vlastimil "Yeah, and compaction (perhaps also other migration users) has to drain the lru pvec... Getting rid of this stuff is worth even by itself." Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Cc: Naoya Horiguchi Acked-by: Vlastimil Babka Signed-off-by: Minchan Kim --- mm/migrate.c | 64 +--- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9baf41c877ff..2666f28b5236 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -913,6 +913,19 @@ static int __unmap_and_move(struct page *page, struct page *newpage, put_anon_vma(anon_vma); unlock_page(page); out: + /* +* If migration is successful, decrease refcount of the newpage +* which will not free the page because new page owner increased +* refcounter. As well, if it is LRU page, add the page to LRU +* list in here. +*/ + if (rc == MIGRATEPAGE_SUCCESS) { + if (unlikely(__is_movable_balloon_page(newpage))) + put_page(newpage); + else + putback_lru_page(newpage); + } + return rc; } @@ -946,6 +959,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ + ClearPageActive(page); + ClearPageUnevictable(page); + if (put_new_page) + put_new_page(newpage, private); + else + put_page(newpage); goto out; } @@ -958,10 +977,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, } rc = __unmap_and_move(page, newpage, force, mode); - if (rc == MIGRATEPAGE_SUCCESS) { - put_new_page = NULL; + if (rc == MIGRATEPAGE_SUCCESS) set_page_owner_migrate_reason(newpage, reason); - } out: if (rc != -EAGAIN) { @@ -974,34 +991,33 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - /* Soft-offlined page shouldn't go through lru cache list */ - if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { + } + + /* +* If migration is successful, releases reference grabbed during +* isolation. Otherwise, restore the page to right list unless +* we want to retry. +*/ + if (rc == MIGRATEPAGE_SUCCESS) { + put_page(page); + if (reason == MR_MEMORY_FAILURE) { /* -* With this release, we free successfully migrated -* page and set PG_HWPoison on just freed page -* intentionally. Although it's rather weird, it's how -* HWPoison flag works at the moment. +* Set PG_HWPoison on just freed page +* intentionally. Although it's rather weird, +* it's how HWPoison flag works at the moment. */ - put_page(page); if (!test_set_page_hwpoison(page)) num_poisoned_pages_inc(); - } else + } + } else { + if (rc != -EAGAIN) putback_lru_page(page); + if (put_new_page) + put_new_page(newpage, private); + else + put_page(newpage); } - /* -*
[PATCH v7 03/12] mm: balloon: use general non-lru movable page feature
Now, VM has a feature to migrate non-lru movable pages so balloon doesn't need custom migration hooks in migrate.c and compaction.c. Instead, this patch implements page->mapping->a_ops->{isolate|migrate|putback} functions. With that, we could remove hooks for ballooning in general migration functions and make balloon compaction simple. Cc: virtualizat...@lists.linux-foundation.org Cc: Rafael Aquini Cc: Konstantin Khlebnikov Acked-by: Vlastimil Babka Signed-off-by: Gioh Kim Signed-off-by: Minchan Kim --- drivers/virtio/virtio_balloon.c| 54 +++--- include/linux/balloon_compaction.h | 53 +++-- include/uapi/linux/magic.h | 1 + mm/balloon_compaction.c| 94 +++--- mm/compaction.c| 7 --- mm/migrate.c | 19 +--- mm/vmscan.c| 2 +- 7 files changed, 85 insertions(+), 145 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 476c0e3a7150..88d5609375de 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Balloon device works in 4K page units. So each page is pointed to by @@ -45,6 +46,10 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; module_param(oom_pages, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); +#ifdef CONFIG_BALLOON_COMPACTION +static struct vfsmount *balloon_mnt; +#endif + struct virtio_balloon { struct virtio_device *vdev; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -488,8 +493,26 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, put_page(page); /* balloon reference */ - return MIGRATEPAGE_SUCCESS; + return 0; } + +static struct dentry *balloon_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static const struct dentry_operations ops = { + .d_dname = simple_dname, + }; + + return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, + BALLOON_KVM_MAGIC); +} + +static struct file_system_type balloon_fs = { + .name = "balloon-kvm", + .mount = balloon_mount, + .kill_sb= kill_anon_super, +}; + #endif /* CONFIG_BALLOON_COMPACTION */ static int virtballoon_probe(struct virtio_device *vdev) @@ -519,9 +542,6 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; balloon_devinfo_init(&vb->vb_dev_info); -#ifdef CONFIG_BALLOON_COMPACTION - vb->vb_dev_info.migratepage = virtballoon_migratepage; -#endif err = init_vqs(vb); if (err) @@ -531,13 +551,33 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; err = register_oom_notifier(&vb->nb); if (err < 0) - goto out_oom_notify; + goto out_del_vqs; + +#ifdef CONFIG_BALLOON_COMPACTION + balloon_mnt = kern_mount(&balloon_fs); + if (IS_ERR(balloon_mnt)) { + err = PTR_ERR(balloon_mnt); + unregister_oom_notifier(&vb->nb); + goto out_del_vqs; + } + + vb->vb_dev_info.migratepage = virtballoon_migratepage; + vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + if (IS_ERR(vb->vb_dev_info.inode)) { + err = PTR_ERR(vb->vb_dev_info.inode); + kern_unmount(balloon_mnt); + unregister_oom_notifier(&vb->nb); + vb->vb_dev_info.inode = NULL; + goto out_del_vqs; + } + vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; +#endif virtio_device_ready(vdev); return 0; -out_oom_notify: +out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -571,6 +611,8 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_stats_work); remove_common(vb); + if (vb->vb_dev_info.inode) + iput(vb->vb_dev_info.inode); kfree(vb); } diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 9b0a15d06a4f..c0c430d06a9b 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -45,9 +45,10 @@ #define _LINUX_BALLOON_COMPACTION_H #include #include -#include +#include #include #include +#include /* * Balloon device information descriptor. @@ -62,6 +63,7 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); + struct inode *inode; }; extern struct page *bal
[PATCH v7 12/12] zram: use __GFP_MOVABLE for memory allocation
Zsmalloc is ready for page migration so zram can use __GFP_MOVABLE from now on. I did test to see how it helps to make higher order pages. Test scenario is as follows. KVM guest, 1G memory, ext4 formated zram block device, for i in `seq 1 8`; do dd if=/dev/vda1 of=mnt/test$i.txt bs=128M count=1 & done wait `pidof dd` for i in `seq 1 2 8`; do rm -rf mnt/test$i.txt done fstrim -v mnt echo "init" cat /proc/buddyinfo echo "compaction" echo 1 > /proc/sys/vm/compact_memory cat /proc/buddyinfo old: init Node 0, zone DMA208120 51 41 11 0 0 0 0 0 0 Node 0, zoneDMA32 16380 13777 9184 3805789 54 3 0 0 0 0 compaction Node 0, zone DMA132 82 40 39 16 2 1 0 0 0 0 Node 0, zoneDMA32 5219 5526 4969 3455 1831677139 15 0 0 0 new: init Node 0, zone DMA379115 97 19 2 0 0 0 0 0 0 Node 0, zoneDMA32 18891 16774 10862 3947637 21 0 0 0 0 0 compaction 1 Node 0, zone DMA214 66 87 29 10 3 0 0 0 0 0 Node 0, zoneDMA32 1612 3139 3154 2469 1745990384 94 7 0 0 As you can see, compaction made so many high-order pages. Yay! Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- drivers/block/zram/zram_drv.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8fcad8b761f1..ccf1bddd09ca 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -732,7 +732,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, handle = zs_malloc(meta->mem_pool, clen, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | - __GFP_HIGHMEM); + __GFP_HIGHMEM | + __GFP_MOVABLE); if (!handle) { zcomp_strm_release(zram->comp, zstrm); zstrm = NULL; @@ -740,7 +741,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic64_inc(&zram->stats.writestall); handle = zs_malloc(meta->mem_pool, clen, - GFP_NOIO | __GFP_HIGHMEM); + GFP_NOIO | __GFP_HIGHMEM | + __GFP_MOVABLE); if (handle) goto compress_again; -- 1.9.1
[PATCH v7 06/12] zsmalloc: use accessor
Upcoming patch will change how to encode zspage meta so for easy review, this patch wraps code to access metadata as accessor. Reviewed-by: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 82 +++ 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 39f29aedd5d6..5da80961ff3e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -268,10 +268,14 @@ struct zs_pool { * A zspage's class index and fullness group * are encoded in its (first)page->mapping */ -#define CLASS_IDX_BITS 28 #define FULLNESS_BITS 4 -#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) -#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) +#define CLASS_BITS 28 + +#define FULLNESS_SHIFT 0 +#define CLASS_SHIFT(FULLNESS_SHIFT + FULLNESS_BITS) + +#define FULLNESS_MASK ((1UL << FULLNESS_BITS) - 1) +#define CLASS_MASK ((1UL << CLASS_BITS) - 1) struct mapping_area { #ifdef CONFIG_PGTABLE_MAPPING @@ -418,6 +422,41 @@ static int is_last_page(struct page *page) return PagePrivate2(page); } +static inline int get_zspage_inuse(struct page *first_page) +{ + return first_page->inuse; +} + +static inline void set_zspage_inuse(struct page *first_page, int val) +{ + first_page->inuse = val; +} + +static inline void mod_zspage_inuse(struct page *first_page, int val) +{ + first_page->inuse += val; +} + +static inline int get_first_obj_offset(struct page *page) +{ + return page->index; +} + +static inline void set_first_obj_offset(struct page *page, int offset) +{ + page->index = offset; +} + +static inline unsigned long get_freeobj(struct page *first_page) +{ + return (unsigned long)first_page->freelist; +} + +static inline void set_freeobj(struct page *first_page, unsigned long obj) +{ + first_page->freelist = (void *)obj; +} + static void get_zspage_mapping(struct page *first_page, unsigned int *class_idx, enum fullness_group *fullness) @@ -426,8 +465,8 @@ static void get_zspage_mapping(struct page *first_page, VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); m = (unsigned long)first_page->mapping; - *fullness = m & FULLNESS_MASK; - *class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK; + *fullness = (m >> FULLNESS_SHIFT) & FULLNESS_MASK; + *class_idx = (m >> CLASS_SHIFT) & CLASS_MASK; } static void set_zspage_mapping(struct page *first_page, @@ -437,8 +476,7 @@ static void set_zspage_mapping(struct page *first_page, unsigned long m; VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); - m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) | - (fullness & FULLNESS_MASK); + m = (class_idx << CLASS_SHIFT) | (fullness << FULLNESS_SHIFT); first_page->mapping = (struct address_space *)m; } @@ -638,7 +676,7 @@ static enum fullness_group get_fullness_group(struct size_class *class, VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); - inuse = first_page->inuse; + inuse = get_zspage_inuse(first_page); objs_per_zspage = class->objs_per_zspage; if (inuse == 0) @@ -684,7 +722,7 @@ static void insert_zspage(struct size_class *class, * empty/full. Put pages with higher ->inuse first. */ list_add_tail(&first_page->lru, &(*head)->lru); - if (first_page->inuse >= (*head)->inuse) + if (get_zspage_inuse(first_page) >= get_zspage_inuse(*head)) *head = first_page; } @@ -861,7 +899,7 @@ static unsigned long obj_idx_to_offset(struct page *page, unsigned long off = 0; if (!is_first_page(page)) - off = page->index; + off = get_first_obj_offset(page); return off + obj_idx * class_size; } @@ -896,7 +934,7 @@ static void free_zspage(struct page *first_page) struct page *nextp, *tmp, *head_extra; VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); - VM_BUG_ON_PAGE(first_page->inuse, first_page); + VM_BUG_ON_PAGE(get_zspage_inuse(first_page), first_page); head_extra = (struct page *)page_private(first_page); @@ -937,7 +975,7 @@ static void init_zspage(struct size_class *class, struct page *first_page) * head of corresponding zspage's freelist. */ if (page != first_page) - page->index = off; + set_first_obj_offset(page, off); vaddr = kmap_atomic(page); link = (struct link_free *)vaddr + off / sizeof(*link); @@ -992,7 +1030,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) SetPagePrivate(page); set_page_private(page, 0); first_page = page; - first_page->inuse = 0; +
[PATCH v7 10/12] zsmalloc: use freeobj for index
Zsmalloc stores first free object's position into freeobj in each zspage. If we change it with index from first_page instead of position, it makes page migration simple because we don't need to correct other entries for linked list if a page is migrated out. Cc: Sergey Senozhatsky Signed-off-by: Minchan Kim --- mm/zsmalloc.c | 139 ++ 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index dd3708611f65..c6fb543cfb98 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -71,9 +71,7 @@ * Object location (, ) is encoded as * as single (unsigned long) handle value. * - * Note that object index is relative to system - * page it is stored in, so for each sub-page belonging - * to a zspage, obj_idx starts with 0. + * Note that object index starts from 0. * * This is made more complicated by various memory models and PAE. */ @@ -214,10 +212,10 @@ struct size_class { struct link_free { union { /* -* Position of next free chunk (encodes ) +* Free object index; * It's valid for non-allocated object */ - void *next; + unsigned long next; /* * Handle of allocated object. */ @@ -261,7 +259,7 @@ struct zspage { unsigned int class:CLASS_BITS; }; unsigned int inuse; - void *freeobj; + unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ }; @@ -459,14 +457,14 @@ static inline void set_first_obj_offset(struct page *page, int offset) page->index = offset; } -static inline unsigned long get_freeobj(struct zspage *zspage) +static inline unsigned int get_freeobj(struct zspage *zspage) { - return (unsigned long)zspage->freeobj; + return zspage->freeobj; } -static inline void set_freeobj(struct zspage *zspage, unsigned long obj) +static inline void set_freeobj(struct zspage *zspage, unsigned int obj) { - zspage->freeobj = (void *)obj; + zspage->freeobj = obj; } static void get_zspage_mapping(struct zspage *zspage, @@ -810,6 +808,10 @@ static int get_pages_per_zspage(int class_size) return max_usedpc_order; } +static struct page *get_first_page(struct zspage *zspage) +{ + return zspage->first_page; +} static struct zspage *get_zspage(struct page *page) { @@ -821,37 +823,33 @@ static struct page *get_next_page(struct page *page) return page->next; } -/* - * Encode as a single handle value. - * We use the least bit of handle for tagging. +/** + * obj_to_location - get (, ) from encoded object value + * @page: page object resides in zspage + * @obj_idx: object index */ -static void *location_to_obj(struct page *page, unsigned long obj_idx) +static void obj_to_location(unsigned long obj, struct page **page, + unsigned int *obj_idx) { - unsigned long obj; + obj >>= OBJ_TAG_BITS; + *page = pfn_to_page(obj >> OBJ_INDEX_BITS); + *obj_idx = (obj & OBJ_INDEX_MASK); +} - if (!page) { - VM_BUG_ON(obj_idx); - return NULL; - } +/** + * location_to_obj - get obj value encoded from (, ) + * @page: page object resides in zspage + * @obj_idx: object index + */ +static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) +{ + unsigned long obj; obj = page_to_pfn(page) << OBJ_INDEX_BITS; - obj |= ((obj_idx) & OBJ_INDEX_MASK); + obj |= obj_idx & OBJ_INDEX_MASK; obj <<= OBJ_TAG_BITS; - return (void *)obj; -} - -/* - * Decode pair from the given object handle. We adjust the - * decoded obj_idx back to its original value since it was adjusted in - * location_to_obj(). - */ -static void obj_to_location(unsigned long obj, struct page **page, - unsigned long *obj_idx) -{ - obj >>= OBJ_TAG_BITS; - *page = pfn_to_page(obj >> OBJ_INDEX_BITS); - *obj_idx = (obj & OBJ_INDEX_MASK); + return obj; } static unsigned long handle_to_obj(unsigned long handle) @@ -869,16 +867,6 @@ static unsigned long obj_to_head(struct size_class *class, struct page *page, return *(unsigned long *)obj; } -static unsigned long obj_idx_to_offset(struct page *page, - unsigned long obj_idx, int class_size) -{ - unsigned long off; - - off = get_first_obj_offset(page); - - return off + obj_idx * class_size; -} - static inline int trypin_tag(unsigned long handle) { return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); @@ -922,13 +910,13 @@ static void free_zspage(struct zs_pool *pool, struct zspage *zspage) /* Initialize a newly allocated zspage */ static void init_zspage(struct size_class *class, struct zspage *zspage) { + uns
[PATCH v7 02/12] mm: migrate: support non-lru movable page migration
We have allowed migration for only LRU pages until now and it was enough to make high-order pages. But recently, embedded system(e.g., webOS, android) uses lots of non-movable pages(e.g., zram, GPU memory) so we have seen several reports about troubles of small high-order allocation. For fixing the problem, there were several efforts (e,g,. enhance compaction algorithm, SLUB fallback to 0-order page, reserved memory, vmalloc and so on) but if there are lots of non-movable pages in system, their solutions are void in the long run. So, this patch is to support facility to change non-movable pages with movable. For the feature, this patch introduces functions related to migration to address_space_operations as well as some page flags. If a driver want to make own pages movable, it should define three functions which are function pointers of struct address_space_operations. 1. bool (*isolate_page) (struct page *page, isolate_mode_t mode); What VM expects on isolate_page function of driver is to return *true* if driver isolates page successfully. On returing true, VM marks the page as PG_isolated so concurrent isolation in several CPUs skip the page for isolation. If a driver cannot isolate the page, it should return *false*. Once page is successfully isolated, VM uses page.lru fields so driver shouldn't expect to preserve values in that fields. 2. int (*migratepage) (struct address_space *mapping, struct page *newpage, struct page *oldpage, enum migrate_mode); After isolation, VM calls migratepage of driver with isolated page. The function of migratepage is to move content of the old page to new page and set up fields of struct page newpage. Keep in mind that you should indicate to the VM the oldpage is no longer movable via __ClearPageMovable() under page_lock if you migrated the oldpage successfully and returns 0. If driver cannot migrate the page at the moment, driver can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time because VM interprets -EAGAIN as "temporal migration failure". On returning any error except -EAGAIN, VM will give up the page migration without retrying in this time. Driver shouldn't touch page.lru field VM using in the functions. 3. void (*putback_page)(struct page *); If migration fails on isolated page, VM should return the isolated page to the driver so VM calls driver's putback_page with migration failed page. In this function, driver should put the isolated page back to the own data structure. 4. non-lru movable page flags There are two page flags for supporting non-lru movable page. * PG_movable Driver should use the below function to make page movable under page_lock. void __SetPageMovable(struct page *page, struct address_space *mapping) It needs argument of address_space for registering migration family functions which will be called by VM. Exactly speaking, PG_movable is not a real flag of struct page. Rather than, VM reuses page->mapping's lower bits to represent it. #define PAGE_MAPPING_MOVABLE 0x2 page->mapping = page->mapping | PAGE_MAPPING_MOVABLE; so driver shouldn't access page->mapping directly. Instead, driver should use page_mapping which mask off the low two bits of page->mapping so it can get right struct address_space. For testing of non-lru movable page, VM supports __PageMovable function. However, it doesn't guarantee to identify non-lru movable page because page->mapping field is unified with other variables in struct page. As well, if driver releases the page after isolation by VM, page->mapping doesn't have stable value although it has PAGE_MAPPING_MOVABLE (Look at __ClearPageMovable). But __PageMovable is cheap to catch whether page is LRU or non-lru movable once the page has been isolated. Because LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also good for just peeking to test non-lru movable pages before more expensive checking with lock_page in pfn scanning to select victim. For guaranteeing non-lru movable page, VM provides PageMovable function. Unlike __PageMovable, PageMovable functions validates page->mapping and mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden destroying of page->mapping. Driver using __SetPageMovable should clear the flag via __ClearMovablePage under page_lock before the releasing the page. * PG_isolated To prevent concurrent isolation among several CPUs, VM marks isolated page as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru movable page, it can skip it. Driver doesn't need to manipulate the flag because VM will set/clear it automatically. Keep in mind that if driver sees PG_isolated page, it means the page have been isolated by VM so it shouldn't touch page.lru field. PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag for own purpose. Cc: Rik van Riel Cc: Joonsoo Kim Cc: Mel Gorman Cc: Hugh Dickins Cc: Rafael Aquini Cc: virtualizat...@lists.l
[PATCH v7 00/12] Support non-lru page migration
Recently, I got many reports about perfermance degradation in embedded system(Android mobile phone, webOS TV and so on) and easy fork fail. The problem was fragmentation caused by zram and GPU driver mainly. With memory pressure, their pages were spread out all of pageblock and it cannot be migrated with current compaction algorithm which supports only LRU pages. In the end, compaction cannot work well so reclaimer shrinks all of working set pages. It made system very slow and even to fail to fork easily which requires order-[2 or 3] allocations. Other pain point is that they cannot use CMA memory space so when OOM kill happens, I can see many free pages in CMA area, which is not memory efficient. In our product which has big CMA memory, it reclaims zones too exccessively to allocate GPU and zram page although there are lots of free space in CMA so system becomes very slow easily. To solve these problem, this patch tries to add facility to migrate non-lru pages via introducing new functions and page flags to help migration. struct address_space_operations { .. .. bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); .. } new page flags PG_movable PG_isolated For details, please read description in "mm: migrate: support non-lru movable page migration". Originally, Gioh Kim had tried to support this feature but he moved so I took over the work. I took many code from his work and changed a little bit and Konstantin Khlebnikov helped Gioh a lot so he should deserve to have many credit, too. And I should mention Chulmin who have tested this patchset heavily so I can find many bugs from him. :) Thanks, Gioh, Konstantin and Chulmin! This patchset consists of five parts. 1. clean up migration mm: use put_page to free page instead of putback_lru_page 2. add non-lru page migration feature mm: migrate: support non-lru movable page migration 3. rework KVM memory-ballooning mm: balloon: use general non-lru movable page feature 4. zsmalloc refactoring for preparing page migration zsmalloc: keep max_object in size_class zsmalloc: use bit_spin_lock zsmalloc: use accessor zsmalloc: factor page chain functionality out zsmalloc: introduce zspage structure zsmalloc: separate free_zspage from putback_zspage zsmalloc: use freeobj for index 5. zsmalloc page migration zsmalloc: page migration support zram: use __GFP_MOVABLE for memory allocation * From v6 * rebase on mmotm-2016-05-27-15-19 * clean up zsmalloc - Sergey * clean up non-lru page migration - Vlastimil * From v5 * rebase on next-20160520 * move utility functions to compaction.c and export - Sergey * zsmalloc dobule free fix - Sergey * add additional Reviewed-by for zsmalloc - Sergey * From v4 * rebase on mmotm-2016-05-05-17-19 * fix huge object migration - Chulmin * !CONFIG_COMPACTION support for zsmalloc * From v3 * rebase on mmotm-2016-04-06-20-40 * fix swap_info deadlock - Chulmin * race without page_lock - Vlastimil * no use page._mapcount for potential user-mapped page driver - Vlastimil * fix and enhance doc/description - Vlastimil * use page->mapping lower bits to represent PG_movable * make driver side's rule simple. * From v2 * rebase on mmotm-2016-03-29-15-54-16 * check PageMovable before lock_page - Joonsoo * check PageMovable before PageIsolated checking - Joonsoo * add more description about rule * From v1 * rebase on v4.5-mmotm-2016-03-17-15-04 * reordering patches to merge clean-up patches first * add Acked-by/Reviewed-by from Vlastimil and Sergey * use each own mount model instead of reusing anon_inode_fs - Al Viro * small changes - YiPing, Gioh Cc: Vlastimil Babka Cc: dri-de...@lists.freedesktop.org Cc: Hugh Dickins Cc: John Einar Reitan Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Konstantin Khlebnikov Cc: Mel Gorman Cc: Naoya Horiguchi Cc: Rafael Aquini Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: virtualizat...@lists.linux-foundation.org Cc: Gioh Kim Cc: Chan Gyun Jeong Cc: Sangseok Lee Cc: Kyeongdon Kim Cc: Chulmin Kim Minchan Kim (12): mm: use put_page to free page instead of putback_lru_page mm: migrate: support non-lru movable page migration mm: balloon: use general non-lru movable page feature zsmalloc: keep max_object in size_class zsmalloc: use bit_spin_lock zsmalloc: use accessor zsmalloc: factor page chain functionality out zsmalloc: introduce zspage structure zsmalloc: separate free_zspage from putback_zspage zsmalloc: use freeobj for index zsmalloc: page migration support zram: use __GFP_MOVABLE for memory allocation Documentation/filesystems/Locking |4 + Documentation/filesystems/vfs.txt | 11 + Documentation/vm/page_migration| 107 ++- drivers/block/zram/zram_drv.c |6 +- drivers/virtio/virtio_balloon.c| 54 +- include/linux/balloon_compaction.h | 53 +- include/linux/compaction.h
Re: [PATCH v2 2/8] zram: switch to crypto compress API
On Tue, May 31, 2016 at 09:20:11PM +0900, Sergey Senozhatsky wrote: > We don't have an idle zstreams list anymore and our write path > now works absolutely differently, preventing preemption during > compression. This removes possibilities of read paths preempting > writes at wrong places (which could badly affect the performance > of both paths) and at the same time opens the door for a move > from custom LZO/LZ4 compression backends implementation to a more > generic one, using crypto compress API. > > Joonsoo Kim [1] attempted to do this a while ago, but faced with > the need of introducing a new crypto API interface. The root cause > was the fact that crypto API compression algorithms require a > compression stream structure (in zram terminology) for both > compression and decompression ops, while in reality only several > of compression algorithms really need it. This resulted in a > concept of context-less crypto API compression backends [2]. Both > write and read paths, though, would have been executed with the > preemption enabled, which in the worst case could have resulted > in a decreased worst-case performance, e.g. consider the > following case: > > CPU0 > > zram_write() > spin_lock() > take the last idle stream > spin_unlock() > > << preempted >> > > zram_read() > spin_lock() > no idle streams > spin_unlock() > schedule() > > resuming zram_write compression() > > but it took me some time to realize that, and it took even longer > to evolve zram and to make it ready for crypto API. The key turned > out to be -- drop the idle streams list entirely. Without the idle > streams list we are free to use compression algorithms that require > compression stream for decompression (read), because streams are > now placed in per-cpu data and each write path has to disable > preemption for compression op, almost completely eliminating the > aforementioned case (technically, we still have a small chance, > because write path has a fast and a slow paths and the slow path > is executed with the preemption enabled; but the frequency of > failed fast path is too low). > > TEST > > > - 4 CPUs, x86_64 system > - 3G zram, lzo > - fio tests: read, randread, write, randwrite, rw, randrw > > test script [3] command: > ZRAM_SIZE=3G LOG_SUFFIX= FIO_LOOPS=5 ./zram-fio-test.sh > >BASE PATCHED > jobs1 > READ: 2527.2MB/s 2482.7MB/s > READ: 2102.7MB/s 2045.0MB/s > WRITE: 1284.3MB/s 1324.3MB/s > WRITE: 1080.7MB/s 1101.9MB/s > READ: 430125KB/s 437498KB/s > WRITE: 430538KB/s 437919KB/s > READ: 399593KB/s 403987KB/s > WRITE: 399910KB/s 404308KB/s > jobs2 > READ: 8133.5MB/s 7854.8MB/s > READ: 7086.6MB/s 6912.8MB/s > WRITE: 3177.2MB/s 3298.3MB/s > WRITE: 2810.2MB/s 2871.4MB/s > READ: 1017.6MB/s 1023.4MB/s > WRITE: 1018.2MB/s 1023.1MB/s > READ: 977836KB/s 984205KB/s > WRITE: 979435KB/s 985814KB/s > jobs3 > READ: 13557MB/s 13391MB/s > READ: 11876MB/s 11752MB/s > WRITE: 4641.5MB/s 4682.1MB/s > WRITE: 4164.9MB/s 4179.3MB/s > READ: 1453.8MB/s 1455.1MB/s > WRITE: 1455.1MB/s 1458.2MB/s > READ: 1387.7MB/s 1395.7MB/s > WRITE: 1386.1MB/s 1394.9MB/s > jobs4 > READ: 20271MB/s 20078MB/s > READ: 18033MB/s 17928MB/s > WRITE: 6176.8MB/s 6180.5MB/s > WRITE: 5686.3MB/s 5705.3MB/s > READ: 2009.4MB/s 2006.7MB/s > WRITE: 2007.5MB/s 2004.9MB/s > READ: 1929.7MB/s 1935.6MB/s > WRITE: 1926.8MB/s 1932.6MB/s > jobs5 > READ: 18823MB/s 19024MB/s > READ: 18968MB/s 19071MB/s > WRITE: 6191.6MB/s 6372.1MB/s > WRITE: 5818.7MB/s 5787.1MB/s > READ: 2011.7MB/s 1981.3MB/s > WRITE: 2011.4MB/s 1980.1MB/s > READ: 1949.3MB/s 1935.7MB/s > WRITE: 1940.4MB/s 1926.1MB/s > jobs6 > READ: 21870MB/s 21715MB/s > READ: 19957MB/s 19879MB/s > WRITE: 6528.4MB/s 6537.6MB/s > WRITE: 6098.9MB/s 6073.6MB/s > READ: 2048.6MB/s 2049.9MB/s > WRITE: 2041.7MB/s 2042.9MB/s > READ: 2013.4MB/s 1990.4MB/s > WRITE: 2009.4MB/s 1986.5MB/s > jobs7 > READ: 21359MB/s 21124MB/s > READ: 19746MB/s 19293MB/s > WRITE: 6660.4MB/s 6518.8MB/s > WRITE: 6211.6MB/s 6193.1MB/s > READ: 2089.7MB/s 2080.6MB/s > WRITE: 2085.8MB/s 2076.5MB/s > READ: 2041.2MB/s 2052.5MB/s > WRITE:
Re: [PATCH v2 2/8] zram: switch to crypto compress API
On Tue, May 31, 2016 at 09:20:11PM +0900, Sergey Senozhatsky wrote: trivial: One thing I got missed in review. > -int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, > - const unsigned char *src, size_t *dst_len); > +int zcomp_compress(struct zcomp_strm *zstrm, > + const unsigned char *src, unsigned int *dst_len); unsigned int for dst_len > > -int zcomp_decompress(struct zcomp *comp, const unsigned char *src, > +int zcomp_decompress(struct zcomp_strm *zstrm, > + const unsigned char *src, > size_t src_len, unsigned char *dst); > size_t src_len?
Re: [PATCH v2 3/8] zram: align zcomp interface to crypto comp API
On Tue, May 31, 2016 at 09:20:12PM +0900, Sergey Senozhatsky wrote: > A cosmetic change: > update zcomp interface to be more aligned with the crypto API. > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim Acked-by: Minchan Kim Aha, you changed src_len in this patchset. :)
Re: [PATCH 4/4] pwm: add ChromeOS EC PWM driver
Hi Gwendal, Thanks for the review. On Sat, May 28, 2016 at 10:02:33PM -0700, Gwendal Grignou wrote: > On Fri, May 27, 2016 at 6:39 PM, Brian Norris > wrote: > > Use the new ChromeOS EC EC_CMD_PWM_{GET,SET}_DUTY commands to control > > one or more PWMs attached to the Embedded Controller. Because the EC > > allows us to modify the duty cycle (as a percentage, where U16_MAX is > > 100%) but not the period, we assign the period a fixed value of > > EC_PWM_MAX_DUTY and reject all attempts to change it. > > > > Signed-off-by: Brian Norris > > --- > > > + */ > > +struct cros_ec_pwm_device { > > + struct device *dev; > > + struct cros_ec_device *ec; > > + struct pwm_chip chip; > > +}; > > + > > +static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct > > pwm_chip *c) > > +{ > > + return container_of(c, struct cros_ec_pwm_device, chip); > > +} > > + > > +static int cros_ec_pwm_set_duty(struct cros_ec_pwm_device *ec_pwm, > > + struct pwm_device *pwm, > > + uint16_t duty) > Given you seprated the pwm stuff from the EC stuff and focusing on > sending a EC command here, the first parameter should be of > cros_ec_device* instead of cros_ec_pwm_device*. Good idea, done. I'll also change the 'pwm_device' arg into just a u8 index, since that's all we care about at this level of abstraction. > > +{ > > + struct cros_ec_device *ec = ec_pwm->ec; > > + struct ec_params_pwm_set_duty *params; > > + struct cros_ec_command *msg; > > + int ret; > > + > > + msg = kzalloc(sizeof(*msg) + sizeof(*params), GFP_KERNEL); > Use an ad-hoc data structure on the stack, so you will always be able > to send the command to the EC. Sure, can do. I guess an anonymous struct will do well here. > > + if (!msg) > > + return -ENOMEM; > > + params = (void *)&msg->data[0]; > > + > > + msg->version = 0; > > + msg->command = EC_CMD_PWM_SET_DUTY; > > + msg->insize = 0; > > + msg->outsize = sizeof(*params); > > + > > + params->duty = duty; > > + params->pwm_type = EC_PWM_TYPE_GENERIC; > > + params->index = pwm->hwpwm; > > + > > + ret = cros_ec_cmd_xfer_status(ec, msg); > > + kfree(msg); > > + return ret; > > +} > > + > > +static int cros_ec_pwm_get_duty(struct cros_ec_pwm_device *ec_pwm, > > + struct pwm_device *pwm) > Idem. Sure. > > +{ > > + struct cros_ec_device *ec = ec_pwm->ec; > > + struct ec_params_pwm_get_duty *params; > > + struct ec_response_pwm_get_duty *resp; > > + struct cros_ec_command *msg; > > + int ret; > > + > > + msg = kzalloc(sizeof(*msg) + max(sizeof(*params), sizeof(*resp)), > Idem. Will do. Here, I guess an anonymous struct containing a union of ec_{params,response}_pwm_get_duty will do it. > > + GFP_KERNEL); > > + if (!msg) > > + return -ENOMEM; > > + params = (void *)&msg->data[0]; > > + resp = (void *)&msg->data[0]; > > + > > + msg->version = 0; > > + msg->command = EC_CMD_PWM_GET_DUTY; > > + msg->insize = sizeof(*params); > > + msg->outsize = sizeof(*resp); > > + > > + params->pwm_type = EC_PWM_TYPE_GENERIC; > > + params->index = pwm->hwpwm; > > + > > + ret = cros_ec_cmd_xfer_status(ec, msg); > > + if (ret < 0) > > + goto out; > > + > > + ret = resp->duty; > > + > > +out: > > + kfree(msg); > > + return ret; > > +} > > + > > +static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, > > +struct pwm_state *state) > > +{ > > + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); > > + > > + /* The EC won't let us change the period */ > > + if (state->period != EC_PWM_MAX_DUTY) > > + return -EINVAL; > > + > > + return cros_ec_pwm_set_duty(ec_pwm, pwm, state->duty_cycle); > I would use ec_pwm->ec here. Sure. > > +} > > + > > +static void cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device > > *pwm, > > + struct pwm_state *state) > > +{ > > + struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip); > > + int ret; > > + > > + ret = cros_ec_pwm_get_duty(ec_pwm, pwm); > > + if (ret < 0) { > > + dev_err(chip->dev, "error getting initial duty: %d\n", ret); > > + return; > > + } > > + > > + state->enabled = (ret > 0); > > + state->period = EC_PWM_MAX_DUTY; > > + state->duty_cycle = ret; > > +} > > + > > +static struct pwm_device * > > +cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args) > > +{ > > + struct pwm_device *pwm; > > + > > + if (args->args[0] >= pc->npwm) > > + return ERR_PTR(-EINVAL); > > + > > + pwm = pwm_request_from_chip(pc, args->arg
Re: zone_reclaimable() leads to livelock in __alloc_pages_slowpath()
On 05/31, Michal Hocko wrote: > > On Sun 29-05-16 23:25:40, Oleg Nesterov wrote: > > > > This single change in get_scan_count() under for_each_evictable_lru() loop > > > > - size = lruvec_lru_size(lruvec, lru); > > + size = zone_page_state_snapshot(lruvec_zone(lruvec), > > NR_LRU_BASE + lru); > > > > fixes the problem too. > > > > Without this change shrink*() continues to scan the LRU_ACTIVE_FILE list > > while it is empty. LRU_INACTIVE_FILE is not empty (just a few pages) but > > we do not even try to scan it, lruvec_lru_size() returns zero. > > OK, you seem to be really seeing a different issue than me. quite possibly, but > My debugging > patch was showing when nothing was really isolated from the LRU lists > (both for shrink_{in}active_list. in my debugging session too. LRU_ACTIVE_FILE was empty, so there is nothing to isolate even if shrink_active_list() is (wrongly called) with nr_to_scan != 0. LRU_INACTIVE_FILE is not empty but it is not scanned because nr_to_scan == 0. But I am afraid I misunderstood you, and you meant something else. > > Then later we recheck zone_reclaimable() and it notices the INACTIVE_FILE > > counter because it uses the _snapshot variant, this leads to livelock. > > > > I guess this doesn't really matter, but in my particular case these > > ACTIVE/INACTIVE counters were screwed by the recent putback_inactive_pages() > > logic. The pages we "leak" in INACTIVE list were recently moved from ACTIVE > > to INACTIVE list, and this updated only the per-cpu ->vm_stat_diff[] > > counters, > > so the "non snapshot" lruvec_lru_size() in get_scan_count() sees the "old" > > numbers. > > Hmm. I am not really sure we can use the _snapshot version in lruvec_lru_size. Yes, yes, I understand, > But I am thinking whether we should simply revert 0db2cb8da89d ("mm, > vmscan: make zone_reclaimable_pages more precise") in 4.6 stable tree. > Does that help as well? I'll test this tomorrow, but even if it helps I am not sure... Yes, this way zone_reclaimable() and get_scan_count() will see the same numbers, but how this can help to make zone_reclaimable() == F at the end? Again, suppose that (say) ACTIVE list is empty but zone->vm_stat != 0 because there is something in per-cpu counter (so that _snapshot == 0). This means that we sill continue to try to scan this list for no reason. But Michal, let me repeat that I do not understand this code, so I can be easily wrong. Oleg.
[PATCH] Staging: comedi: das16.c: Added a blank line fixed a comment, coding style issue
From: PedroNieto Fixed a coding style issue. Signed-off-by: Pedro Nieto --- drivers/staging/comedi/drivers/das16.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/staging/comedi/drivers/das16.c b/drivers/staging/comedi/drivers/das16.c index fd8e0b7..ef345dc 100644 --- a/drivers/staging/comedi/drivers/das16.c +++ b/drivers/staging/comedi/drivers/das16.c @@ -198,6 +198,7 @@ enum { das16_pg_1601, das16_pg_1602, }; + static const int *const das16_gainlists[] = { NULL, das16jr_gainlist, @@ -428,8 +429,10 @@ static const struct das16_board das16_boards[] = { }, }; -/* Period for timer interrupt in jiffies. It's a function - * to deal with possibility of dynamic HZ patches */ +/* + * Period for timer interrupt in jiffies. It's a function + * to deal with possibility of dynamic HZ patches + */ static inline int timer_period(void) { return HZ / 20; -- 2.1.4
Re: [PATCH v2 4/8] zram: use crypto api to check alg availability
On Tue, May 31, 2016 at 09:20:13PM +0900, Sergey Senozhatsky wrote: > There is no way to get a string with all the crypto comp > algorithms supported by the crypto comp engine, so we need > to maintain our own backends list. At the same time we > additionally need to use crypto_has_comp() to make sure > that the user has requested a compression algorithm that is > recognized by the crypto comp engine. Relying on /proc/crypto > is not an options here, because it does not show not-yet-inserted > compression modules. > > Example: > > modprobe zram > cat /proc/crypto | grep -i lz4 > modprobe lz4 > cat /proc/crypto | grep -i lz4 > name : lz4 > driver : lz4-generic > module : lz4 > > So the user can't tell exactly if the lz4 is really supported > from /proc/crypto output, unless someone or something has loaded > it. > > This patch also adds crypto_has_comp() to zcomp_available_show(). > We store all the compression algorithms names in zcomp's `backends' > array, regardless the CONFIG_CRYPTO_FOO configuration, but show > only those that are also supported by crypto engine. This helps > user to know the exact list of compression algorithms that can be > used. So, if we do 'cat /sys/block/zram0/comp_algorithm", every crypto modules in the backend array are loaded in memory and not unloaded until admin executes rmmod? Right? > > Example: > module lz4 is not loaded yet, but is supported by the crypto > engine. /proc/crypto has no information on this module, while > zram's `comp_algorithm' lists it: > > cat /proc/crypto | grep -i lz4 > > cat /sys/block/zram0/comp_algorithm > [lzo] lz4 deflate lz4hc 842 > > We also now fully rely on crypto_has_comp() when configure a new > device. The existing `backends' array is kept for user's convenience > only -- there is no way to list all of the compression algorithms > supported by crypto -- and is not guaranteed to contain every > compression module name supported by the kernel. Switch to > crypto_has_comp() has an advantage of permitting the usage of > out-of-tree crypto compression modules (implementing S/W or H/W > compression). If user load out-of-tree crypto compression module, what's status of comp_algorithm? #> insmod foo_crypto.ko #> echo foo > /sys/block/zram0/comp_algorithm #> cat /sys/block/zram0/comp_algorithm lzo lz4 [foo] ? > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim > --- > Documentation/blockdev/zram.txt | 11 > drivers/block/zram/zcomp.c | 58 > - > drivers/block/zram/zram_drv.c | 16 +++- > drivers/block/zram/zram_drv.h | 5 ++-- > 4 files changed, 57 insertions(+), 33 deletions(-) > > diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt > index 13100fb..7c05357 100644 > --- a/Documentation/blockdev/zram.txt > +++ b/Documentation/blockdev/zram.txt > @@ -83,6 +83,17 @@ pre-created. Default: 1. > #select lzo compression algorithm > echo lzo > /sys/block/zram0/comp_algorithm > > + For the time being, the `comp_algorithm' content does not necessarily > + show every compression algorithm supported by the kernel. We keep this > + list primarily to simplify device configuration and one can configure > + a new device with a compression algorithm that is not listed in > + `comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API > + and, if some of the algorithms were built as modules, it's impossible > + to list all of them using, for instance, /proc/crypto or any other > + method. This, however, has an advantage of permitting the usage of > + custom crypto compression modules (implementing S/W or H/W > + compression). > + > 4) Set Disksize > Set disk size by writing the value to sysfs node 'disksize'. > The value can be either in bytes or you can use mem suffixes. > diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c > index f357268..2381ca9 100644 > --- a/drivers/block/zram/zcomp.c > +++ b/drivers/block/zram/zcomp.c > @@ -26,17 +26,6 @@ static const char * const backends[] = { > NULL > }; > > -static const char *find_backend(const char *compress) > -{ > - int i = 0; > - while (backends[i]) { > - if (sysfs_streq(compress, backends[i])) > - break; > - i++; > - } > - return backends[i]; > -} > - > static void zcomp_strm_free(struct zcomp_strm *zstrm) > { > if (!IS_ERR_OR_NULL(zstrm->tfm)) > @@ -68,30 +57,53 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp > *comp, gfp_t flags) > return zstrm; > } > > +bool zcomp_available_algorithm(const char *comp) > +{ > + /* > + * Crypto does not ignore a trailing new line symbol, > + * so make sure you don't supply a string containing > + * one. > + * This also means that we keep `backends' array for > + * zcomp_available_show() only an
Re: Enhancing semantics with memremap() - aliasing with memremap()
On Tue, 2016-05-31 at 19:27 +0200, Luis R. Rodriguez wrote: > On Tue, May 31, 2016 at 07:25:14PM +0200, Luis R. Rodriguez wrote: > > > > On Tue, May 31, 2016 at 09:58:28AM -0700, Christoph Hellwig wrote: > > > > > > On Tue, May 24, 2016 at 04:36:42PM -0700, Luis R. Rodriguez wrote: > > > > > > > > Is it a good time for that now? I would hope identifying proper > > > > aliasing uses for memremap() might be a bit easier now than for > > > > ioremap() given its not used as widely. It may be an easier target > > > > to also write some grammar rules for it as well. > > > > > > So you want an explicit opt-in flag to allow aliasing? Sounds fine to > > > me. > > > > Yup! Can the default then safely already be no-aliasing then? > > Or if aliasing is truly not needed as often a different API, this > maybe useful later if we pick up again module namespace stuff. I agree that we should be able to change memremap() to fail an aliasing request since it's a relatively new interface. ioremap() needs to start from adding a warning message. Thanks, -Toshi
Re: [PATCH v2 5/8] zram: cosmetic: cleanup documentation
On Tue, May 31, 2016 at 09:20:14PM +0900, Sergey Senozhatsky wrote: > zram documentation is a mix of different > styles: spaces, tabs, tabs + spaces, etc. > > clean it up. > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim > Cc: Jonathan Corbet Acked-by: Minchan Kim
linux-next: manual merge of the arm tree with Linus' tree
Hi Russell, Today's linux-next merge of the arm tree got a conflict in: drivers/gpu/drm/rockchip/rockchip_drm_drv.c between commit: 2d90d477430d ("drm/rockchip: support non-iommu buffer path") from Linus' tree and commit: ac09e446377a ("drm: convert DT component matching to component_match_add_release()") from the arm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc drivers/gpu/drm/rockchip/rockchip_drm_drv.c index a409d1f703cb,8168810053b3.. --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@@ -507,18 -502,9 +512,20 @@@ static int rockchip_drm_platform_probe( continue; } + iommu = of_parse_phandle(port->parent, "iommus", 0); + if (!iommu || !of_device_is_available(iommu->parent)) { + dev_dbg(dev, "no iommu attached for %s, using non-iommu buffers\n", + port->parent->full_name); + /* + * if there is a crtc not support iommu, force set all + * crtc use non-iommu buffer. + */ + is_support_iommu = false; + } + - component_match_add(dev, &match, compare_of, port->parent); + of_node_get(port->parent); + component_match_add_release(dev, &match, release_of, + compare_of, port->parent); of_node_put(port); }
Re: iommu/iova: introduce per-cpu caching to iova allocation
On Fri, May 27, 2016 at 09:50:07PM +, Linux Kernel wrote: > Web: > https://git.kernel.org/torvalds/c/9257b4a206fc0229dd5f84b78e4d1ebf3f91d270 > Commit: 9257b4a206fc0229dd5f84b78e4d1ebf3f91d270 > Parent: 2aac630429d986a43ac59525a4cff47a624dc58e > Refname:refs/heads/master > Author: Omer Peleg > AuthorDate: Wed Apr 20 11:34:11 2016 +0300 > Committer: David Woodhouse > CommitDate: Wed Apr 20 15:42:24 2016 -0400 > > iommu/iova: introduce per-cpu caching to iova allocation Since this commit, I'm seeing various traces from alloc_iova_fast spew this.. [ 11.403098] BUG: using smp_processor_id() in preemptible [] code: systemd-udevd/331 [ 11.403101] caller is debug_smp_processor_id+0x17/0x20 [ 11.403103] CPU: 1 PID: 331 Comm: systemd-udevd Not tainted 4.7.0-rc1-think+ #5 [ 11.403106] 8804fc7d8040 21902647 8804fef6b578 a83b7467 [ 11.403107] 0001 a8c310f3 8804fef6b5a8 a83d6d2a [ 11.403108] 8804fe75e398 0001 000f [ 11.403108] Call Trace: [ 11.403111] [] dump_stack+0x68/0xa1 [ 11.403112] [] check_preemption_disabled+0xda/0xe0 [ 11.403113] [] debug_smp_processor_id+0x17/0x20 [ 11.403115] [] alloc_iova_fast+0x11b/0x250 [ 11.403117] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 11.403119] [] intel_alloc_iova+0x86/0xe0 [ 11.403120] [] __intel_map_single+0x98/0x190 [ 11.403121] [] intel_map_page+0x39/0x40 [ 11.403124] [] usb_hcd_map_urb_for_dma+0x4ab/0x5b0 [ 11.403125] [] ? __intel_map_single+0x190/0x190 [ 11.403126] [] usb_hcd_submit_urb+0x36d/0xb40 [ 11.403128] [] ? add_lock_to_list.isra.29.constprop.46+0x77/0xb0 [ 11.403129] [] ? __lock_acquire+0xc43/0x1200 [ 11.403131] [] usb_submit_urb+0x2f4/0x550 [ 11.403133] [] usb_serial_generic_submit_read_urb+0x4c/0xc0 [ 11.403134] [] ? trace_hardirqs_on_caller+0xed/0x1b0 [ 11.403135] [] usb_serial_generic_submit_read_urbs+0x1f/0x80 [ 11.403137] [] ? _raw_spin_unlock_irqrestore+0x42/0x70 [ 11.403137] [] usb_serial_generic_open+0x4e/0x60 [ 11.403139] [] usb_console_setup+0x1ff/0x3c0 [ 11.403140] [] ? _raw_spin_unlock+0x31/0x50 [ 11.403141] [] register_console+0x29a/0x380 [ 11.403142] [] usb_serial_console_init+0x22/0x40 [ 11.403143] [] usb_serial_probe+0x1175/0x11c0 [ 11.403144] [] ? debug_smp_processor_id+0x17/0x20 [ 11.403145] [] ? get_lock_stats+0x19/0x50 [ 11.403146] [] ? debug_smp_processor_id+0x17/0x20 [ 11.403147] [] ? get_lock_stats+0x19/0x50 [ 11.403148] [] ? _raw_spin_unlock_irqrestore+0x57/0x70 [ 11.403149] [] ? trace_hardirqs_on+0xd/0x10 [ 11.403150] [] ? _raw_spin_unlock_irqrestore+0x42/0x70 [ 11.403151] [] usb_probe_interface+0x122/0x2e0 [ 11.403153] [] driver_probe_device+0x245/0x450 [ 11.403154] [] __driver_attach+0xd5/0x100 [ 11.403155] [] ? driver_probe_device+0x450/0x450 [ 11.403156] [] bus_for_each_dev+0x73/0xc0 [ 11.403157] [] driver_attach+0x1e/0x20 [ 11.403158] [] usb_serial_register_drivers+0x266/0x4d0 [ 11.403159] [] ? 0xc0301000 [ 11.403161] [] usb_serial_module_init+0x1e/0x1000 [usb_debug] [ 11.403163] [] do_one_initcall+0xf7/0x180 [ 11.403164] [] ? rcu_read_lock_sched_held+0x6c/0x80 [ 11.403165] [] ? kmem_cache_alloc_trace+0x2c3/0x360 [ 11.403167] [] ? do_init_module+0x27/0x1da [ 11.403169] [] do_init_module+0x5f/0x1da [ 11.403171] [] load_module+0x21ee/0x27d0 [ 11.403172] [] ? disable_ro_nx+0x50/0x50 [ 11.403174] [] ? show_coresize+0x30/0x30 [ 11.403175] [] SYSC_finit_module+0xe6/0x120 [ 11.403177] [] SyS_finit_module+0xe/0x10 [ 11.403178] [] do_syscall_64+0x61/0x170 [ 11.403179] [] entry_SYSCALL64_slow_path+0x25/0x25 [ 11.403269] BUG: using smp_processor_id() in preemptible [] code: systemd-udevd/331 [ 11.403270] caller is debug_smp_processor_id+0x17/0x20 [ 11.403272] CPU: 1 PID: 331 Comm: systemd-udevd Not tainted 4.7.0-rc1-think+ #5 [ 11.403273] 8804fc7d8040 21902647 8804fef6b578 a83b7467 [ 11.403274] 0001 a8c310f3 8804fef6b5a8 a83d6d2a [ 11.403276] 8804fe75e398 0001 000f [ 11.403276] Call Trace: [ 11.403277] [] dump_stack+0x68/0xa1 [ 11.403278] [] check_preemption_disabled+0xda/0xe0 [ 11.403279] [] debug_smp_processor_id+0x17/0x20 [ 11.403280] [] alloc_iova_fast+0x11b/0x250 [ 11.403281] [] ? mark_held_locks+0x76/0xa0 [ 11.403282] [] intel_alloc_iova+0x86/0xe0 [ 11.403283] [] __intel_map_single+0x98/0x190 [ 11.403284] [] intel_map_page+0x39/0x40 [ 11.403286] [] usb_hcd_map_urb_for_dma+0x4ab/0x5b0 [ 11.403287] [] ? __intel_map_single+0x190/0x190 [ 11.403288] [] usb_hcd_submit_urb+0x36d/0xb40 [ 11.403289] [] ? add_lock_to_list.isra.29.constprop.46+0x77/0xb0 [ 11.403290] [] ? __lock_acquire+0xc43/0x1200 [ 11.403292] [] usb_submit_urb+0x2f4/0
Re: [PATCH v2 6/8] zram: delete custom lzo/lz4
On Tue, May 31, 2016 at 09:20:15PM +0900, Sergey Senozhatsky wrote: > Remove lzo/lz4 backends, we use crypto API now. > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim Acked-by: Minchan Kim
Re: [PATCH] Documentation: Fix some grammar mistakes in sync_file.txt
2016-05-14 Javier Martinez Canillas : > There are two sentences in the Sync File documentation where the > english is a little off. This patch is an attempt to fix these. > > Signed-off-by: Javier Martinez Canillas > > --- > > Documentation/sync_file.txt | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt > index eaf8297dbca2..e8e2ebafe5fa 100644 > --- a/Documentation/sync_file.txt > +++ b/Documentation/sync_file.txt > @@ -6,8 +6,8 @@ > > This document serves as a guide for device drivers writers on what the > sync_file API is, and how drivers can support it. Sync file is the carrier of > -the fences(struct fence) that needs to synchronized between drivers or across > -process boundaries. > +the fences(struct fence) that are needed to synchronize between drivers or > +across process boundaries. > > The sync_file API is meant to be used to send and receive fence information > to/from userspace. It enables userspace to do explicit fencing, where instead > @@ -32,7 +32,7 @@ in-fences and out-fences > Sync files can go either to or from userspace. When a sync_file is sent from > the driver to userspace we call the fences it contains 'out-fences'. They are > related to a buffer that the driver is processing or is going to process, so > -the driver an create out-fence to be able to notify, through fence_signal(), > +the driver creates an out-fence to be able to notify, through fence_signal(), > when it has finished using (or processing) that buffer. Out-fences are fences > that the driver creates. Thanks, Javier! Reviewed-by: Gustavo Padovan Gustavo
Re: [PATCH v2 7/8] zram: add more compression algorithms
On Tue, May 31, 2016 at 09:20:16PM +0900, Sergey Senozhatsky wrote: > Add "deflate", "lz4hc", "842" algorithms to the list of > known compression backends. The real availability of those > algorithms, however, depends on the corresponding > CONFIG_CRYPTO_FOO config options. > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim Acked-by: Minchan Kim
[PATCH 0/1] shiftfs: uid/gid shifting filesystem
[This patch is updated for the new VFS APIs in 4.7-rc1; it's also been updated as Serge has been hammering on it] My use case for this is that I run a lot of unprivileged architectural emulation containers on my system using user namespaces. Details here: http://blog.hansenpartnership.com/unprivileged-build-containers/ They're mostly for building non-x86 stuff (like aarch64 and arm secure boot and mips images). For builds, I have all the environments in my home directory with downshifted uids; however, sometimes I need to use them to administer real images that run on systems, meaning the uids are the usual privileged ones not the downshifted ones. The only current choice I have is to start the emulation as root so the uid/gids match. The reason for this filesystem is to use my standard unprivileged containers to maintain these images. The way I do this is crack the image with a loop and then shift the uids before bringing up the container. I usually loop mount into /var/tmp/images/, so it's owned by real root there: jarvis:~ # ls -l /var/tmp/images/mips|head -4 total 0 drwxr-xr-x 1 root root 8192 May 12 08:33 bin drwxr-xr-x 1 root root6 May 12 08:33 boot drwxr-xr-x 1 root root 167 May 12 08:33 dev And I usually run my build containers with a uid_map of 0 10 1000 1000 1000 1 65534 101000 1 (maps 0-999 shifted, then shifts nobody to 1000 and keeps my uid [1000] fixed so I can mount my home directory into the namespace) and something similar with gid_map. So I shift mount the mips image with mount -t shiftfs -o uidmap=0:10:1000,uidmap=65534:101000:1,gidmap=0:10:100,gidmap=1 01:100101:899,gidmap=65533:101000:2 /var/tmp/images/mips /home/jejb/containers/mips and I now see it as jejb@jarvis:~> ls -l containers/mips|head -4 total 0 drwxr-xr-x 1 10 10 8192 May 12 08:33 bin/ drwxr-xr-x 1 10 106 May 12 08:33 boot/ drwxr-xr-x 1 10 10 167 May 12 08:33 dev/ Like my usual unprivileged build roots and I can now use an unprivileged container to enter and administer the image. It seems like a lot of container systems need to do something similar when they try and provide unprivileged access to standard images. Right at the moment, the security mechanism only allows root in the host to use this, but it's not impossible to come up with a scheme for marking trees that can safely be shift mounted by unprivileged user namespaces. James --- fs/Kconfig | 8 + fs/Makefile| 1 + fs/shiftfs.c | 877 + include/uapi/linux/magic.h | 2 + 4 files changed, 888 insertions(+)
[PATCH] LSM: Fix for security_inode_getsecurity and -EOPNOTSUPP
Subject: [PATCH] LSM: Fix for security_inode_getsecurity and -EOPNOTSUPP Serge Hallyn pointed out that the current implementation of security_inode_getsecurity() works if there is only one hook provided for it, but will fail if there is more than one and the attribute requested isn't supplied by the first module. This isn't a problem today, since only SELinux and Smack provide this hook and there is (currently) no way to enable both of those modules at the same time. Serge, however, wants to introduce a capability attribute and an inode_getsecurity hook in the capability security module to handle it. This addresses that upcoming problem, will be required for "extreme stacking" and is just a better implementation. Signed-off-by: Casey Schaufler --- security/security.c | 29 + 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/security/security.c b/security/security.c index 3644b03..5a749ed 100644 --- a/security/security.c +++ b/security/security.c @@ -699,18 +699,39 @@ int security_inode_killpriv(struct dentry *dentry) int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { + struct security_hook_list *hp; + int rc; + if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; - return call_int_hook(inode_getsecurity, -EOPNOTSUPP, inode, name, - buffer, alloc); + /* +* Only one module will provide an attribute with a given name. +*/ + list_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) { + rc = hp->hook.inode_getsecurity(inode, name, buffer, alloc); + if (rc != -EOPNOTSUPP) + return rc; + } + return -EOPNOTSUPP; } int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) { + struct security_hook_list *hp; + int rc; + if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; - return call_int_hook(inode_setsecurity, -EOPNOTSUPP, inode, name, - value, size, flags); + /* +* Only one module will provide an attribute with a given name. +*/ + list_for_each_entry(hp, &security_hook_heads.inode_setsecurity, list) { + rc = hp->hook.inode_setsecurity(inode, name, value, size, + flags); + if (rc != -EOPNOTSUPP) + return rc; + } + return -EOPNOTSUPP; } int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size)
[PATCH 1/1] shiftfs: uid/gid shifting bind mount
This allows any subtree to be uid/gid shifted and bound elsewhere. It does this by operating simlarly to overlayfs. Its primary use is for shifting the underlying uids of filesystems used to support unpriviliged (uid shifted) containers. The usual use case here is that the container is operating with an uid shifted unprivileged root but sometimes needs to make use of or work with a filesystem image that has root at real uid 0. Signed-off-by: James Bottomley --- v2: fixed up locking and addressed viro's comments use negative dentries on the underlying cached in d_fsdata to remove the extra lookup_one_len() calls Add show_options/statfs callbacks Add proper Kconfig plumbing v3: fix RCU lookup and IMA related BUG add more locking ext4 needs d_path which does lookups, so shift perm in open/release fix read and write inode accounting implement d_real to fix ext4 permission problems v4: adjust lookups for new dcache rules fix xattr lookups for new prototypes and indirect via vfs_ operations --- diff --git a/fs/Kconfig b/fs/Kconfig index b8fcb41..ced94c7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -95,6 +95,14 @@ source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" source "fs/overlayfs/Kconfig" +config SHIFT_FS + tristate "UID/GID shifting overlay filesystem for containers" + help + This filesystem can overlay any mounted filesystem and shift + the uid/gid the files appear at. The idea is that + unprivileged containers can use this to mount root volumes + using this technique. + menu "Caches" source "fs/fscache/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 85b6e13..ff9890e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -128,3 +128,4 @@ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS)+= efivarfs/ +obj-$(CONFIG_SHIFT_FS) += shiftfs.o diff --git a/fs/shiftfs.c b/fs/shiftfs.c new file mode 100644 index 000..318966d --- /dev/null +++ b/fs/shiftfs.c @@ -0,0 +1,877 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct shiftfs_super_info { + struct vfsmount *mnt; + struct uid_gid_map uid_map, gid_map; +}; + +static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode, + struct dentry *dentry); + +enum { + OPT_UIDMAP, + OPT_GIDMAP, + OPT_LAST, +}; + +/* global filesystem options */ +static const match_table_t tokens = { + { OPT_UIDMAP, "uidmap=%u:%u:%u" }, + { OPT_GIDMAP, "gidmap=%u:%u:%u" }, + { OPT_LAST, NULL } +}; + +/* + * code stolen from user_namespace.c ... except that these functions + * return the same id back if unmapped ... should probably have a + * library? + */ +static u32 map_id_down(struct uid_gid_map *map, u32 id) +{ + unsigned idx, extents; + u32 first, last; + + /* Find the matching extent */ + extents = map->nr_extents; + smp_rmb(); + for (idx = 0; idx < extents; idx++) { + first = map->extent[idx].first; + last = first + map->extent[idx].count - 1; + if (id >= first && id <= last) + break; + } + /* Map the id or note failure */ + if (idx < extents) + id = (id - first) + map->extent[idx].lower_first; + + return id; +} + +static u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + unsigned idx, extents; + u32 first, last; + + /* Find the matching extent */ + extents = map->nr_extents; + smp_rmb(); + for (idx = 0; idx < extents; idx++) { + first = map->extent[idx].lower_first; + last = first + map->extent[idx].count - 1; + if (id >= first && id <= last) + break; + } + /* Map the id or note failure */ + if (idx < extents) + id = (id - first) + map->extent[idx].first; + + return id; +} + +static bool mappings_overlap(struct uid_gid_map *new_map, +struct uid_gid_extent *extent) +{ + u32 upper_first, lower_first, upper_last, lower_last; + unsigned idx; + + upper_first = extent->first; + lower_first = extent->lower_first; + upper_last = upper_first + extent->count - 1; + lower_last = lower_first + extent->count - 1; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + u32 prev_upper_first, prev_lower_first; + u32 prev_upper_last, prev_lower_last; + struct uid_gid_extent *prev; + + prev = &new_map->extent[idx]; + + prev_upper_first = prev->first; + prev_lower_first = prev->lower_first; + prev_upper_last = prev_upper_firs
Re: [PATCH 1/6] phy: Add USB Type-C PHY driver for rk3399
Hi Doug Thanks for your review, I will modified them in next version(v1) and with Guenter Roeck's comments in: https://chromium-review.googlesource.com/#/c/348154/ On 06/01/2016 05:35 AM, Doug Anderson wrote: Chris, On Thu, May 26, 2016 at 11:02 PM, Chris Zhong wrote: Add a PHY provider driver for the rk3399 SoC Type-c PHY. The USB Type-C PHY is designed to support the USB3 and DP applications. The PHY basically has two main components: USB3 and DisplyPort. USB3 operates in SuperSpeed mode and the DP can operate at RBR, HBR and HBR2 data rates. Signed-off-by: Chris Zhong --- drivers/phy/Kconfig | 7 + drivers/phy/Makefile | 1 + drivers/phy/phy-rockchip-typec.c | 823 +++ 3 files changed, 831 insertions(+) This is a bit of a superficial review. Hopefully we can find someone to do something more thorough.I create mode 100644 drivers/phy/phy-rockchip-typec.c diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 26566db..dc388a3d 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -351,6 +351,13 @@ config PHY_ROCKCHIP_DP help Enable this to support the Rockchip Display Port PHY. +config PHY_ROCKCHIP_TYPEC + tristate "Rockchip TYPEC PHY Driver" + depends on ARCH_ROCKCHIP && OF + select GENERIC_PHY + help + Enable this to support the Rockchip USB TYPEC PHY. + config PHY_ST_SPEAR1310_MIPHY tristate "ST SPEAR1310-MIPHY driver" select GENERIC_PHY diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index 24596a9..91fa413 100644 --- a/drivers/phy/Makefile +++ b/drivers/phy/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PHY_QCOM_APQ8064_SATA) += phy-qcom-apq8064-sata.o obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o obj-$(CONFIG_PHY_ROCKCHIP_EMMC) += phy-rockchip-emmc.o obj-$(CONFIG_PHY_ROCKCHIP_DP) += phy-rockchip-dp.o +obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA)+= phy-qcom-ipq806x-sata.o obj-$(CONFIG_PHY_ST_SPEAR1310_MIPHY) += phy-spear1310-miphy.o obj-$(CONFIG_PHY_ST_SPEAR1340_MIPHY) += phy-spear1340-miphy.o diff --git a/drivers/phy/phy-rockchip-typec.c b/drivers/phy/phy-rockchip-typec.c new file mode 100644 index 000..6609cfb --- /dev/null +++ b/drivers/phy/phy-rockchip-typec.c @@ -0,0 +1,823 @@ +/* + * Rockchip usb3 PHY driver + * + * Copyright (C) 2016 Kever Yang + *Chris Zhong + * Copyright (C) 2016 ROCKCHIP, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include p +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ADDR_ADJ 2 +#define CMN_SSM_BANDGAP(0x21 << ADDR_ADJ) +#define CMN_SSM_BIAS (0x22 << ADDR_ADJ) +#define CMN_PLLSM0_PLLEN (0x29 << ADDR_ADJ) +#define CMN_PLLSM0_PLLPRE (0x2a << ADDR_ADJ) +#define CMN_PLLSM0_PLLVREF (0x2b << ADDR_ADJ) +#define CMN_PLLSM0_PLLLOCK (0x2c << ADDR_ADJ) +#define CMN_PLLSM1_PLLEN (0x31 << ADDR_ADJ) +#define CMN_PLLSM1_PLLPRE (0x32 << ADDR_ADJ) +#define CMN_PLLSM1_PLLVREF (0x33 << ADDR_ADJ) +#define CMN_PLLSM1_PLLLOCK (0x34 << ADDR_ADJ) +#define CMN_PLLSM1_USER_DEF_CTRL (0x37 << ADDR_ADJ) +#define CMN_ICAL_OVRD (0xc1 << ADDR_ADJ) +#define CMN_PLL0_VCOCAL_OVRD (0x83 << ADDR_ADJ) +#define CMN_PLL0_VCOCAL_INIT (0x84 << ADDR_ADJ) +#define CMN_PLL0_VCOCAL_ITER (0x85 << ADDR_ADJ) +#define CMN_PLL0_LOCK_REFCNT_START (0x90 << ADDR_ADJ) +#define CMN_PLL0_LOCK_PLLCNT_START (0x92 << ADDR_ADJ) +#define CMN_PLL0_LOCK_PLLCNT_THR (0x93 << ADDR_ADJ) +#define CMN_PLL0_INTDIV(0x94 << ADDR_ADJ) +#define CMN_PLL0_FRACDIV (0x95 << ADDR_ADJ) +#define CMN_PLL0_HIGH_THR (0x96 << ADDR_ADJ) +#define CMN_PLL0_DSM_DIAG (0x97 << ADDR_ADJ) +#define CMN_PLL0_SS_CTRL1 (0x98 << ADDR_ADJ) +#define CMN_PLL0_SS_CTRL2 (0x99 << ADDR_ADJ) +#define CMN_PLL1_VCOCAL_START (0xa1 << ADDR_ADJ) +#define CMN_PLL1_VCOCAL_OVRD (0xa3 << ADDR_ADJ) +#define CMN_PLL1_VCOCAL_INIT (0xa4 << ADDR_ADJ) +#define CMN_PLL1_VCOCAL_ITER (0xa5 << ADDR_ADJ) +#define CMN_PLL1_LOCK_REFCNT_START (0xb0 << ADDR_ADJ) +#define CMN_PLL1_LOCK
[PATCH v1] mm: thp: check pmd_trans_unstable() after split_huge_pmd()
split_huge_pmd() doesn't guarantee that the pmd is normal pmd pointing to pte entries, which can be checked with pmd_trans_unstable(). Some callers of split_huge_pmd() don't have the check, so let's add it. Signed-off-by: Naoya Horiguchi --- mm/gup.c | 2 ++ mm/mempolicy.c | 2 ++ mm/mprotect.c | 2 +- mm/mremap.c| 3 +-- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git v4.6-mmotm-2016-05-27-15-19/mm/gup.c v4.6-mmotm-2016-05-27-15-19_patched/mm/gup.c index c057784..dee142e 100644 --- v4.6-mmotm-2016-05-27-15-19/mm/gup.c +++ v4.6-mmotm-2016-05-27-15-19_patched/mm/gup.c @@ -279,6 +279,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma, spin_unlock(ptl); ret = 0; split_huge_pmd(vma, pmd, address); + if (pmd_trans_unstable(pmd)) + ret = -EBUSY; } else { get_page(page); spin_unlock(ptl); diff --git v4.6-mmotm-2016-05-27-15-19/mm/mempolicy.c v4.6-mmotm-2016-05-27-15-19_patched/mm/mempolicy.c index 297d685..fe90e50 100644 --- v4.6-mmotm-2016-05-27-15-19/mm/mempolicy.c +++ v4.6-mmotm-2016-05-27-15-19_patched/mm/mempolicy.c @@ -512,6 +512,8 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, } } + if (pmd_trans_unstable(pmd)) + return 0; retry: pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { diff --git v4.6-mmotm-2016-05-27-15-19/mm/mprotect.c v4.6-mmotm-2016-05-27-15-19_patched/mm/mprotect.c index 5019a1e..a4830f0 100644 --- v4.6-mmotm-2016-05-27-15-19/mm/mprotect.c +++ v4.6-mmotm-2016-05-27-15-19_patched/mm/mprotect.c @@ -163,7 +163,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { split_huge_pmd(vma, pmd, addr); - if (pmd_none(*pmd)) + if (pmd_trans_unstable(pmd)) continue; } else { int nr_ptes = change_huge_pmd(vma, pmd, addr, diff --git v4.6-mmotm-2016-05-27-15-19/mm/mremap.c v4.6-mmotm-2016-05-27-15-19_patched/mm/mremap.c index 1f157ad..da22ad2 100644 --- v4.6-mmotm-2016-05-27-15-19/mm/mremap.c +++ v4.6-mmotm-2016-05-27-15-19_patched/mm/mremap.c @@ -210,9 +210,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, } } split_huge_pmd(vma, old_pmd, old_addr); - if (pmd_none(*old_pmd)) + if (pmd_trans_unstable(old_pmd)) continue; - VM_BUG_ON(pmd_trans_huge(*old_pmd)); } if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr)) break; -- 2.7.0
Re: [PATCH v2 8/8] zram: drop gfp_t from zcomp_strm_alloc()
On Tue, May 31, 2016 at 09:20:17PM +0900, Sergey Senozhatsky wrote: > We now allocate streams from CPU_UP hot-plug path, there are > no context-dependent stream allocations anymore and we can > schedule from zcomp_strm_alloc(). Use GFP_KERNEL directly and > drop a gfp_t parameter. > > Signed-off-by: Sergey Senozhatsky > Cc: Minchan Kim > Cc: Joonsoo Kim Acked-by: Minchan Kim
Re: [PATCH 2/6] Documentation: bindings: add dt doc for Rockchip USB Type-C PHY
On 06/01/2016 03:57 AM, Doug Anderson wrote: Chris, On Fri, May 27, 2016 at 1:46 AM, Chris Zhong wrote: Hi Heiko On 05/27/2016 04:29 PM, Heiko Stuebner wrote: Hi Chris, Am Freitag, 27. Mai 2016, 14:02:15 schrieb Chris Zhong: This patch adds a binding that describes the Rockchip USB Type-C PHY for rk3399. Signed-off-by: Chris Zhong --- .../devicetree/bindings/phy/phy-rockchip-typec.txt | 55 ++ 1 file changed, 55 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt new file mode 100644 index 000..402f667 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt @@ -0,0 +1,55 @@ +ROCKCHIP type-c PHY + +Required properties: + - compatible: should be "rockchip,rk3399-typec-phy" + - reg : Address and length of the usb phy control register set + - rockchip,grf : phandle to the syscon managing the "general + register files" + - clocks : phandle + clock specifier for the phy clocks + - clock-names: string, clock name, must be "tcpdcore", "tcpdphy_ref"; + - resets : a list of phandle + reset specifier pairs + - reset-names : string reset name, must be: +"tcphy_rst", "tcphy_pipe_rst", "uphy_tcphy_rst" In other contexts I believe Heiko has requested that a suffix like "_rst" not be present in the names of reset signals. We already know that this is a list of reset names so the "_rst" is redundant. Yes, "_rst" is redundant.I will remove it next version. + - #phy-cells: Must be 0. See ./phy-bindings.txt for details. + - rockchip,usb3phy*: phy registers embed in grf + +Example: + tcphy0: phy@ff7c { + compatible = "rockchip,rk3399-typec-phy"; + reg = <0x0 0xff7c 0x0 0x4>; + #phy-cells = <0>; + rockchip,grf = <&grf>; + clocks = <&cru SCLK_UPHY0_TCPDCORE>, +<&cru SCLK_UPHY0_TCPDPHY_REF>; + clock-names = "tcpdcore", "tcpdphy_ref"; + resets = <&cru SRST_UPHY0>, +<&cru SRST_UPHY0_PIPE_L00>, +<&cru SRST_P_UPHY0_TCPHY>; + reset-names = "tcphy_rst", "tcphy_pipe_rst", "uphy_tcphy_rst"; + rockchip,usb3phy_con0 = <0x0e580 0 16>; + rockchip,usb3phy_con1 = <0x0e584 0 16>; + rockchip,usb3phy_con2 = <0x0e588 0 16>; + rockchip,usb3phy_status0 = <0x0e5c0 0 13>; + rockchip,usb3phy_status1 = <0x0e5c4 0 12>; please embedded this register data in the driver instead (not in the devicetree), matched against the compatible value. See Frank's usb2phy driver for reference if needed. Okay, I will move them to driver file next version, Thanks. Just making sure: I saw a RESEND of your original version get posted, but nothing that addresses Heiko's comments, right? Also: note that bindings should be sent in the patch _before_ the code. So instead of: [1] phy: Add USB Type-C PHY driver for rk3399 [2] Documentation: bindings: add dt doc for Rockchip USB Type-C PHY [3] drm/rockchip: vop: add cdn DP support for rk3399 [4] Documentation: bindings: add dt documentation for cdn DP controller You should have: [1] Documentation: bindings: add dt doc for Rockchip USB Type-C PHY [2] phy: Add USB Type-C PHY driver for rk3399 [3] Documentation: bindings: add dt documentation for cdn DP controller [4] drm/rockchip: vop: add cdn DP support for rk3399 The first patch is lack of a header file, so I resend the patches, and did not change anything, I will do them in V1 version. And I will change the sequence of patches, Thanks for your comments. -Doug
Re: [PATCH v2 0/8] zram: switch to crypto api
On (05/31/16 12:07), Andrew Morton wrote: > > test-fio-zram-842 > > 197.907655282 seconds time elapsed > > 201.623142884 seconds time elapsed > > 226.854291345 seconds time elapsed > > test-fio-zram-DEFLATE > > 253.259516155 seconds time elapsed > > 258.148563401 seconds time elapsed > > 290.251909365 seconds time elapsed > > test-fio-zram-LZ4 > > 27.022598717 seconds time elapsed > > 29.580522717 seconds time elapsed > > 33.293463430 seconds time elapsed > > test-fio-zram-LZ4HC > > 56.393954615 seconds time elapsed > > 74.904659747 seconds time elapsed > > 101.940998564 seconds time elapsed > > test-fio-zram-LZO > > 28.155948075 seconds time elapsed > > 30.390036330 seconds time elapsed > > 34.455773159 seconds time elapsed > > I'm having trouble understanding the benchmark results. What is being > compared to what and which was faster and how much? Hello, 'benchmarking' was probably a bit too strong word to use here. basically, I performed fio test with the increasing number of parallel jobs (max to 3) on a 3G zram device, using `static' data and the following crypto comp algorithms: 842, deflate, lz4, lz4hc, lzo the output was: - test running time (which can tell us what algorithms performs faster) and - zram mm_stat (which tells the compressed memory size, max used memory, etc). it's just for information. for example, LZ4HC has twice the running time of LZO, but the compressed memory size is: 23592960 vs 34603008 bytes. -ss
Re: [LKP] [lkp] [dcache_{readdir, dir_lseek}() users] 4e82901cd6: reaim.jobs_per_min -49.1% regression
Al Viro writes: > On Tue, May 31, 2016 at 04:15:15PM +0800, kernel test robot wrote: >> >> >> FYI, we noticed reaim.jobs_per_min -49.1% regression due to commit: >> >> commit 4e82901cd6d1af21ae232ae835c36d8230c809e8 >> ("dcache_{readdir,dir_lseek}() users: switch to ->iterate_shared") >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master >> >> in testcase: reaim >> on test machine: lkp-hsx04: 144 threads Brickland Haswell-EX with 512G memory >> with following parameters: >> cpufreq_governor=performance/iterations=4/nr_task=1600%/test=fserver > > [snip] > > Is there any way to get the profiles? Sorry, our perf-profile support is broken after upgrading perf-profile recently. We will restore it ASAP and send back to you the perf profile results. Best Regards, Huang, Ying
Re: [PATCH v2 5/5] arm64/numa: avoid inconsistent information to be printed
On 2016/5/31 19:27, Leizhen (ThunderTown) wrote: > > > On 2016/5/31 17:07, Matthias Brugger wrote: >> >> >> On 28/05/16 11:22, Zhen Lei wrote: >>> numa_init(of_numa_init) may returned error because of numa configuration >>> error. So "No NUMA configuration found" is inaccurate. In fact, specific >>> configuration error information should be immediately printed by the >>> testing branch. >>> >>> Signed-off-by: Zhen Lei >>> --- >> >> Which kernel version is this patch based on? > > Base on > mainline(git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git), I > git pulled about 3-5 days ago, the last commit-id is dc03c0f. > > And thess patches base on https://lkml.org/lkml/2016/5/24/679 series(acpi > numa) as David Daney's requirement. > >> >> Regards, >> Matthias >> >>> arch/arm64/mm/numa.c | 6 +++--- >>> drivers/of/of_numa.c | 7 +++ >>> 2 files changed, 6 insertions(+), 7 deletions(-) >>> >>> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c >>> index 2601660..1b9622c 100644 >>> --- a/arch/arm64/mm/numa.c >>> +++ b/arch/arm64/mm/numa.c >>> @@ -338,8 +338,10 @@ static int __init numa_init(int (*init_func)(void)) >>> if (ret < 0) >>> return ret; >>> >>> -if (nodes_empty(numa_nodes_parsed)) >>> +if (nodes_empty(numa_nodes_parsed)) { >>> +pr_info("No NUMA configuration found\n"); >>> return -EINVAL; >>> +} >>> >>> ret = numa_register_nodes(); >>> if (ret < 0) >>> @@ -370,8 +372,6 @@ static int __init dummy_numa_init(void) >>> >>> if (numa_off) >>> pr_info("NUMA disabled\n"); /* Forced off on command line. */ >>> -else >>> -pr_info("No NUMA configuration found\n"); >>> pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", >>> 0LLU, PFN_PHYS(max_pfn) - 1); >>> >>> diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c >>> index fb62307..3157130 100644 >>> --- a/drivers/of/of_numa.c >>> +++ b/drivers/of/of_numa.c >>> @@ -63,7 +63,7 @@ static int __init of_numa_parse_memory_nodes(void) >>> struct device_node *np = NULL; >>> struct resource rsrc; >>> u32 nid; >>> -int i, r = 0; >>> +int i, r; >>> >>> for_each_node_by_type(np, "memory") { >>> r = of_property_read_u32(np, "numa-node-id", &nid); >>> @@ -81,12 +81,11 @@ static int __init of_numa_parse_memory_nodes(void) >>> if (!i || r) { >>> of_node_put(np); >>> pr_err("NUMA: bad property in memory node\n"); >>> -r = r ? : -EINVAL; >>> -break; >>> +return r ? : -EINVAL; >>> } >>> } >>> >>> -return r; >>> +return 0; >>> } >>> >> >> Well this is fixing changes you introduced in this patch-set. Any reason >> this is not part of patch 2? > > Because they fixed two different problems. Hi, Matthias I thougth it again on my way home yesterday. Yeah, you're right, move this part to patch 2, will make these two patches looks more well. I put it here before, because for "No numa configuration" case, it originally returns error code, so that it can not walk to "if (nodes_empty(numa_nodes_parsed))". ret = init_func(); if (ret < 0) return ret; -if (nodes_empty(numa_nodes_parsed)) +if (nodes_empty(numa_nodes_parsed)) { +pr_info("No NUMA configuration found\n"); return -EINVAL; +} Regards, Zhen Lei > >> >>> static int __init of_numa_parse_distance_map_v1(struct device_node *map) >>> -- >>> 2.5.0 >>> >>> >>> >>> ___ >>> linux-arm-kernel mailing list >>> linux-arm-ker...@lists.infradead.org >>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >>> >> >> . >>
Re: [PATCH] attribute_container: Fix typo
On 31/05/2016 23:26, Greg KH wrote: On Tue, May 31, 2016 at 04:17:15PM +0800, Xiubo Li wrote: Signed-off-by: Xiubo Li I can't take patches without any changelog text, sorry. My mistake, I will send the v2 one. Thanks.
Re: [PATCH 00/10] Documentation/Sphinx
On Mon, 30 May 2016 11:10:26 +0200 Daniel Vetter wrote: > I think next steps is to get this merged into docs-next, with a stable > tag, so that I can pull it into drm-misc. So, I want to take another look at this, which probably will need another day or two before it can happen. First impression, though, is that this is great, so I'm expecting that I'll be applying it. Not sure about the stable tag, though? It doesn't really seem like stable material? jon
Re: [PATCH v2 4/8] zram: use crypto api to check alg availability
Hello Minchan, On (06/01/16 09:03), Minchan Kim wrote: [..] > So, if we do 'cat /sys/block/zram0/comp_algorithm", every crypto modules > in the backend array are loaded in memory and not unloaded until admin > executes rmmod? Right? yes, I think so. [..] > If user load out-of-tree crypto compression module, what's status of > comp_algorithm? > > #> insmod foo_crypto.ko > #> echo foo > /sys/block/zram0/comp_algorithm > #> cat /sys/block/zram0/comp_algorithm > lzo lz4 [foo] > ? yes, "lzo lz4 [out-of-tree-module-name]". -ss
Re: [PATCH 3/4] doc: dt: pwm: add binding for ChromeOS EC PWM
Hi Gwendal, On Sat, May 28, 2016 at 10:00:45PM -0700, Gwendal Grignou wrote: (Top posting?) > Instead of using device tree, assuming you have firmware control, > another way could be to add a firmware feature: I do have firmware control, but I don't think that will be too necessary actually. > for instance, there is one EC_FEATURE_PWM_FAN, the fan PWM, one for > the keyboard lightning as well. (see num ec_feature_code) > By adding one more, you let cros_ec_dev load the platform driver for > you, it works even if the machine does not use device tree. I think we can actually get this without doing the EC_FEATURE_* thing (which notably is not in upstream, BTW), nor by requiring a separate node with the "google,cros-ec-pwm" property, but instead by running a sample EC_CMD_PWM_GET_DUTY command on indeces [0, 255], stopping at the first INVAL_PARAM failure (if we stop at 0, then we have no PWM API at all). But that still leaves the problem of mapping PWMs to consumer devices. The phandle translation is very helpful for our DT-based systems, but there isn't a really nice equivalent for non-DT ones. I see struct pwm_lookup, which looks like it could do some of what we want, but we'd still either need to encode a ton of board-specific information in the kernel, or else start exposing PWMs via the non-EC_PWM_TYPE_GENERIC methods (see the new enum ec_pwm_type, where we can see EC_PWM_TYPE_KB_LIGHT and EC_PWM_TYPE_DISPLAY_LIGHT). Anyway, along this line, perhaps it makes sense to: (a) drop the "google,cros-ec-pwm" property (via the probe method I described above) (b) drop the separate node for "google,cros-ec-pwm", since the presence of this feature can be detected by the same methods as in (a) leaving the only DT binding change to be to: (c) add an optional #pwm-cells property to the cros-ec node (Documentation/devicetree/bindings/mfd/cros-ec.txt) so that we can still utilize the nice PWM of_xlate stuff (and its corresponding pwms = <...> property for consumer devices) This would set us up for a minimal reliance on device tree (we can try to expose EC_PWM_TYPE_KB_LIGHT or EC_PWM_TYPE_DISPLAY_LIGHT via the pwm_lookup infrastructure, once we need to support a non-DT system), without losing much of its benefits (we can still do index-based / phandle lookups with DT). The remaining question is: where should this minimal PWM driver go, then? We would want to make calls to it from the cros_ec MFD/platform driver, so... drivers/platform/chrome/cros_ec_dev.c? Or more likely a modularized drivers/platform/chrome/cros_ec_pwm.c, where cros_ec_dev.c can make a few calls to it? Brian > Gwendal. > > On Fri, May 27, 2016 at 6:39 PM, Brian Norris > wrote: > > The ChromeOS Embedded Controller can support controlling its attached > > PWMs via its host-command interface. The number of supported PWMs varies > > on a per-board basis, so we define a "google,max-pwms" property to > > handle this. And because the EC only allows specifying the duty cycle > > and not the period, we don't specify the period via pwm-cells, and > > instead have only support 1 cell -- to specify the index. > > > > Signed-off-by: Brian Norris > > --- > > .../devicetree/bindings/pwm/google,cros-ec-pwm.txt | 25 > > ++ > > 1 file changed, 25 insertions(+) > > create mode 100644 > > Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt > > > > diff --git a/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt > > b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt > > new file mode 100644 > > index ..f1c9540fc23f > > --- /dev/null > > +++ b/Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.txt > > @@ -0,0 +1,25 @@ > > +* PWM controlled by ChromeOS EC > > + > > +Google's ChromeOS EC PWM is a simple PWM attached to the Embedded > > Controller > > +(EC) and controlled via a host-command interface. > > + > > +An EC PWM node should be only found as a sub-node of the EC node (see > > +Documentation/devicetree/bindings/mfd/cros-ec.txt). > > + > > +Required properties: > > +- compatible: Must contain "google,cros-ec-pwm" > > +- #pwm-cells: Should be 1. The cell specifies the PWM index. > > +- google,max-pwms: Specifies the number of PWMs supported by the EC. > > + > > +Example: > > + cros-ec@0 { > > + compatible = "google,cros-ec-spi"; > > + > > + ... > > + > > + cros_ec_pwm: ec-pwm { > > + compatible = "google,cros-ec-pwm"; > > + #pwm-cells = <1>; > > + google,max-pwms = <4>; > > + }; > > + }; > > -- > > 2.8.0.rc3.226.g39d4020 > >
Re: [PATCH v2 3/8] zram: align zcomp interface to crypto comp API
On (06/01/16 08:48), Minchan Kim wrote: > On Tue, May 31, 2016 at 09:20:12PM +0900, Sergey Senozhatsky wrote: > > A cosmetic change: > > update zcomp interface to be more aligned with the crypto API. > > > > Signed-off-by: Sergey Senozhatsky > > Cc: Minchan Kim > > Cc: Joonsoo Kim > > Acked-by: Minchan Kim > > Aha, you changed src_len in this patchset. :) oh, thanks. hmm... I didn't want to add `cosmetic noise' to the 0002, but probably 0003 better be part of 0002. the patch is not so big so it won't complicate 0002 a lot. I'll ask Andrew to squash, or will squash on my side and resend the whole series. -ss
Re: [PATCH v2 2/8] zram: switch to crypto compress API
On (06/01/16 08:44), Minchan Kim wrote: > > > trivial: > > One thing I got missed in review. > > > -int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, > > - const unsigned char *src, size_t *dst_len); > > +int zcomp_compress(struct zcomp_strm *zstrm, > > + const unsigned char *src, unsigned int *dst_len); > > unsigned int for dst_len > > > > > -int zcomp_decompress(struct zcomp *comp, const unsigned char *src, > > +int zcomp_decompress(struct zcomp_strm *zstrm, > > + const unsigned char *src, > > size_t src_len, unsigned char *dst); > > > > size_t src_len? thanks for spotting it! -ss
RE: [PATCH v2 3/3] ACPI / button: Send "open" state after boot/resume
Hi, > From: Benjamin Tissoires [mailto:benjamin.tissoi...@gmail.com] > Subject: Re: [PATCH v2 3/3] ACPI / button: Send "open" state after > boot/resume > > Hi Lv, > > On Tue, May 31, 2016 at 4:55 AM, Zheng, Lv wrote: > > Hi, > > > >> From: Benjamin Tissoires [mailto:benjamin.tissoi...@gmail.com] > >> Subject: Re: [PATCH v2 3/3] ACPI / button: Send "open" state after > >> boot/resume > >> > >> On Fri, May 27, 2016 at 9:16 AM, Lv Zheng > wrote: > [snipped > ]>> As Valdis replied on 0/3, I don't think this is a good solution (even > >> temporary). Linux should not assume the current state of a input > >> device, and sending unconditionally 1 here is wrong. If the device is > >> on a docking station, you will wake up the wrong monitor and screw > the > >> user session (and this will be a regression). > > [Lv Zheng] > > We are doing the test to see how this behaves on several different > platforms. > > > >> > >> How about we simply send the current LID state stored in the ACPI? > >> something like calling acpi_lid_send_state() directly? > > [Lv Zheng] > > This is what we are going to eliminate in [PATCH 01]. > > We have several real bugs related to sending a wrong state to the > userspace. > > Userspace will suspend right after resume because of the 'close' state. > > On the other hand, you are trying to remove 23de5d9ef2a4bbc4f733f, a > patch that has been around for 9 years and we only start seeing > devices where this logic is not working... > > I am not saying your approach is wrong, I am just saying that instead > of a plain revert, we should probably be more conservative and add a > quirk for those buggy machines. Ideally, we should try to understand > why there is such an issue that Windows doesn't have (the solution > might just be that given Windows doesn't care, we are screwed). > > BTW, on the Surface 3, there is a WMI > (f7cc25ec-d20b-404c-8903-0ed4359c18ae -> WQHE) which returns the > actual value of the LID, without using PNP0C0D at all. I have a > feeling that Windows might use it when it is in trouble or in an > unsure state. I couldn't find this WMI on the 2 other systems so that > may also be just a one shot for the Surface 3. [Lv Zheng] Thanks for the information. But it seems Surface 3 is not a good example for this. It is a runtime idle platform. And the root cause of the Surface 3 issue should be in the freeze code. After waking the system up via LID irq, the irq is dropped. But I guess it is risky to invoke irq handler right there, doing so could break may existing drivers. > > [snipped] > > [Lv Zheng] > > The understanding here is incorrect. > > We have 3 bogus devices. > > 1 of them is surface 3 which is a hardware reduced platform. > > The others are all traditional platforms. > > > > = > > The facts are: > > > > Both the platforms return cached lid state from _LID. > > The cached value will be updated by lid irq (via GPIO IRQ, GPE, or EC > event). > > AML tables will send lid notification in the irq handler. > > > > Some AML tables will update the cached value in _WAK (I'll describe why > it is necessary below). > > But updating the cached value in _WAK is not guaranteed by all AML > tables. > > > > For the 'close' state irq, all tables will send lid close notification. > > For the 'open' state irq, it seems there are tables never sending lid open > notification (sounds like Windows do not care about lid open). > > = > > > > Surface 3 is entirely a different case. > > It is a runtime idle system and hardware reduced. > > On that kind of system, lid open is handled by OS not by BIOS. > > Surface 3 is exactly the platform that doesn't send lid open notification. > > I guess the AML is intentionally written in this way to be compliant to > the traditional platforms. > > > > While on the traditional platforms: > > When lid is opened, BIOS handles the lid irq and wakes the system from > the FACS waking vector. > > So it is likely that there is no lid open irq after the system is resumed. > > BIOS may forget to update the cached lid value in the _WAK or some > other control methods that could be executed after resuming. > > Then if we send _LID result to the user space, the cached value could > apparently be 'close'. > > > > That explains why there is no "lid open" configuration in the "Windows > Device Manager". > > > >> > >> I propose as a workaround to enable a kthread that will monitor the > >> lid state and update the correct value to userspace (5 sec of polling > >> time should be enough given that systemd checks every 20 sec). > >> We should probably have this workaround only for a set of known > >> devices, as it might just be temporary for those until the actual > >> underlying problem is fixed (wrong DSDT in the Surface 3 case that > >> doesn't notify at all, issue in the EC for the Surface Pro 1 and the > >> Samsung N210). > > [Lv Zheng] > > That cannot help to solve the issue/gap. > > > > The problem is Linux userspace has a facility re-checking lid state wh
[PATCH v2] attribute_container: Fix typo
The 't' in "function" was missing, this patch fixes this typo: s/funcion/function/g Signed-off-by: Xiubo Li --- Changes for V2: - Add changelog text. drivers/base/attribute_container.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index 2ba4cac..95e3ef8 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -243,7 +243,7 @@ attribute_container_remove_device(struct device *dev, * @dev: The generic device to run the trigger for * @fn the function to execute for each classdev. * - * This funcion is for executing a trigger when you need to know both + * This function is for executing a trigger when you need to know both * the container and the classdev. If you only care about the * container, then use attribute_container_trigger() instead. */ -- 1.8.3.1
Re: [PATCH v2 2/4] Documentation: Add documentation for APM X-Gene SoC PMU DTS binding
Hi Mark, On Tue, May 31, 2016 at 10:17 AM, Tai Tri Nguyen wrote: > Hi Mark, > > On Tue, May 31, 2016 at 9:56 AM, Mark Rutland wrote: >> On Mon, May 02, 2016 at 02:46:05PM -0700, Tai Tri Nguyen wrote: >>> Hi Rob, >>> >>> On Mon, May 2, 2016 at 1:56 PM, Rob Herring wrote: >>> > On Wed, Apr 20, 2016 at 12:31:22PM +0100, Will Deacon wrote: >>> >> On Mon, Apr 18, 2016 at 01:04:53PM -0700, Tai Tri Nguyen wrote: >>> >> > >> +Required properties for MCB subnode: >>> >> > >> +- compatible : Shall be "apm,xgene-pmu-mcb". >>> >> > >> +- reg: First resource shall be the MCB PMU >>> >> > >> resource. >>> >> > >> +- index : Instance number of the MCB PMU. >>> >> > >> + >>> >> > >> +Required properties for MC subnode: >>> >> > >> +- compatible : Shall be "apm,xgene-pmu-mc". >>> >> > >> +- reg: First resource shall be the MC PMU >>> >> > >> resource. >>> >> > >> +- index : Instance number of the MC PMU. >>> >> > > >>> >> > > Don't use indexes. You probably need phandles to the nodes these are >>> >> > > related to. >>> >> > > >>> >> > > How many variations of child nodes do you expect to have? 2, 10, 50? >>> >> > > You >>> >> > > might want to just collapse all this down to a single node and put >>> >> > > this >>> >> > > information in the driver if it is fixed for each SoC and there's >>> >> > > only a >>> >> > > handful. >>> >> > > >>> >> > >>> >> > For each kind of PMU, for example memory controller PMU, I expect to >>> >> > have the number of instances up to 8. >>> >> > They are actually all independent PMU nodes and have their own CSR >>> >> > memory bases. >>> >> > The indexes are used for exposing the devices to perf user only. It >>> >> > doesn't have an impact on the programming model. >>> >> > Mark also had the same concern. >>> >> >>> >> Regardless, I'll need an ack from Rob or Mark before I can merge this. >>> > >>> > I still have a concern with this. Needing an index to expose to the user >>> > is generally not a valid reason. That's OS specific and therefore >>> > doesn't belong in DT. >>> > >>> > Rob >>> >>> I can use device name here. However, the perf event names will be >>> different between DT and ACPI which I want to avoid. >>> And the names don't look good at all. >>> Also, specifically for MC and MCB PMUs, the indexes are compared >>> against the active MC/MCB mask to find out whether they are populated >>> or not. >>> Without using the index property, I will also need a mapping function >>> of physical device addresses and their physical ids. >> >> What's wrong with using ${device}.{physical_address} as the PMU name? >> That would be unique and consistent regardless of the firmware, no >> mapping nor index property necessary. >> >> That's sufficient for any user already familiar with the topology, a >> familiarity you seem to be assuming regardless by not explicitly >> describing the topology in the DT. >> >> Thanks, >> Mark. > > Okay. I'll do fix it for the next patches. > > Thanks, > -- > Tai I'm facing a problem after removing the index for MCU and MC sub-nodes. The MCUs and MCs aren't always enabled depending on how DRAM DIMMs are installed on the system. I still need a way to associate the MCU with its indicator bit in the enable mask retrieved from CSR. For MC and MCB nodes only, can I introduce an "enable-mask" field? For example: " pmucmcb@7e71 { compatible = "apm,xgene-pmu-mcb"; reg = <0x0 0x7e71 0x0 0x1000>; enable-mask = <0x0001>; }; pmucmcb@7e73 { compatible = "apm,xgene-pmu-mcb"; reg = <0x0 0x7e73 0x0 0x1000>; enable-mask = <0x0002>; }; " Or can you please give a suggestion how I can fix it? Thanks, -- Tai
Re: [PATCH 1/3] mmc: fix mmc mode selection for HS-DDR and higher
On 05/29/2016 04:04 PM, Chen-Yu Tsai wrote: > When IS_ERR_VALUE was removed from the mmc core code, it was replaced > with a simple not-zero check. This does not work, as the value checked > is the return value for mmc_select_bus_width, which returns the set > bit width on success. This made eMMC modes higher than HS-DDR unusable. > > Fix this by checking for a positive return value instead. > > Fixes: 287980e49ffc ("remove lots of IS_ERR_VALUE abuses") > Cc: Arnd Bergmann > Signed-off-by: Chen-Yu Tsai Acked-by: Jaehoon Chung Best Regards, Jaehoon Chung > --- > drivers/mmc/core/mmc.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c > index c984321d1881..aafb73d080ca 100644 > --- a/drivers/mmc/core/mmc.c > +++ b/drivers/mmc/core/mmc.c > @@ -1276,7 +1276,7 @@ static int mmc_select_hs200(struct mmc_card *card) >* switch to HS200 mode if bus width is set successfully. >*/ > err = mmc_select_bus_width(card); > - if (!err) { > + if (err > 0) { > val = EXT_CSD_TIMING_HS200 | > card->drive_strength << EXT_CSD_DRV_STR_SHIFT; > err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, > @@ -1583,7 +1583,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, > } else if (mmc_card_hs(card)) { > /* Select the desired bus width optionally */ > err = mmc_select_bus_width(card); > - if (!err) { > + if (err > 0) { > err = mmc_select_hs_ddr(card); > if (err) > goto free_card; >
Re: + zram-switch-to-crypto-compress-api.patch added to -mm tree
On (05/31/16 12:10), a...@linux-foundation.org wrote: > The patch titled > Subject: zram: switch to crypto compress API > has been added to the -mm tree. Its filename is > zram-switch-to-crypto-compress-api.patch > > This patch should soon appear at > > http://ozlabs.org/~akpm/mmots/broken-out/zram-switch-to-crypto-compress-api.patch > and later at > > http://ozlabs.org/~akpm/mmotm/broken-out/zram-switch-to-crypto-compress-api.patch Andrew, I'd prefer this patch to also include changes from this one: http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch (http://article.gmane.org/gmane.linux.kernel/2231316) (per Minchan). IOW, squash http://ozlabs.org/~akpm/mmots/broken-out/zram-switch-to-crypto-compress-api.patch and http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch what's the best way to do it? should I resend the series? -ss
RE: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in acpi_hw_get_access_bit_width()
Hi, > From: Boris Ostrovsky [mailto:boris.ostrov...@oracle.com] > Subject: Re: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in > acpi_hw_get_access_bit_width() > > On 05/31/2016 10:36 AM, Mike Marshall wrote: > > Hi Lv... > > > > I was dead in the water before this patch, qemu-kvm would crash > > right away, now everything seems to work great again, thanks! From > > my perspective this fixes the c3bc26d problem. > > > > Acked-by: Mike Marshall > > > > -Mike > > > > On Tue, May 31, 2016 at 3:13 AM, Zheng, Lv > wrote: > >> Hi, Boris and Mike > >> > >> Please help to validate if this version can also fix your issues. > >> After enumerating the possible cases, I realized that the address check > might not be necessary. > >> But we need a max_bit_width check in this function to make it > prepared for a future usage in acpi_read()/acpi_write(). > >> Thanks in advance. > > You can add > Tested-by: Boris Ostrovsky [Lv Zheng] Great! > > although this still allows us to access bytes that we are not supposed to. > > You may be able to calculate access width as something like > >min (max_bit_width, >ACPI_ROUND_UP((ACPI_ROUND_DOWN(reg->bit_offset, 8) + > reg->bit_width), >8); [Lv Zheng] This looks reasonable to me. And I actually was considering to add such kind of code before. But since we have been working with ACPICA for so many years without supporting reg->bit_offset. We probably can make sure that there is no real platform setting reg->bit_offset to the values other than 0. It is likely that the real platforms that want to utilize "access size" are using it correctly, so we actually needn't do any optimization in acpi_hw_get_access_bit_size() except returning the default one. The code I put here (which looks like an optimization) is meant to create a workaround for the bug reported against acpi_tb_init_generic_address(). Thus IMO, we could do less aggressive approach and do further changes unless we can see real cases. Cheers -Lv > > -boris > > > >> Best regards > >> -Lv > >> > >>> From: Zheng, Lv > >>> Subject: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in > >>> acpi_hw_get_access_bit_width() > >>> > >>> The address check in acpi_hw_get_access_bit_width() should be byte > >>> width > >>> based, not bit width based. This patch fixes this mistake. > >>> > >>> For those who want to review acpi_hw_access_bit_width(), here is the > >>> concerns and the design details of the function: > >>> > >>> It is supposed that the GAS Address field should be aligned to the byte > >>> width indicated by the GAS AccessSize field. Similarly, for the old non > >>> GAS register, it is supposed that its Address should be aligned to its > >>> Length. > >>> For the "AccessSize = 0 (meaning ANY)" case, we try to return the > >>> maximum > >>> instruction width (64 for MMIO or 32 for PIO) or the user expected > access > >>> bit width (64 for acpi_read()/acpi_write() or 32 for acpi_hw_read()/ > >>> acpi_hw_write()) for futher operation and it is supposed that the GAS > >>> Address field should always be aligned to the maximum expected > access > >>> bit > >>> width (otherwise it can't be ANY). > >>> > >>> The problem is in acpi_tb_init_generic_address(), where the non GAS > >>> register's Length is converted into the GAS BitWidth field, its Address > is > >>> converted into the GAS Address field, and the GAS AccessSize field is > left > >>> 0 but most of the register actually cannot be accessed using "ANY" > >>> accesses. > >>> > >>> As a conclusion, when AccessSize = 0 (ANY), the Address should either > be > >>> aligned to the BitWidth (wrong conversion) or aligned to 32 (PIO) or > 64 > >>> (MMIO). Since BitWidth for the wrong conversion is 8,16,32, the > Address > >>> of the real GAS should always be aligned to 8,16,32, the address > alignment > >>> check is not necessary. But we in fact could enhance the check for a > future > >>> case where max_bit_width could be 64 for a PIO access issued from > >>> acpi_read()/acpi_write(). > >>> > >>> Fixes: b314a172ee96 ("ACPICA: Hardware: Add optimized access bit > width > >>> support") > >>> Cc: Boris Ostrovsky > >>> Cc: Mike Marshall > >>> Suggested-by: Jan Beulich > >>> Signed-off-by: Lv Zheng > >>> --- > >>> drivers/acpi/acpica/hwregs.c | 16 +++- > >>> 1 file changed, 7 insertions(+), 9 deletions(-) > >>> > >>> diff --git a/drivers/acpi/acpica/hwregs.c > b/drivers/acpi/acpica/hwregs.c > >>> index 0f18dbc..0553c0b 100644 > >>> --- a/drivers/acpi/acpica/hwregs.c > >>> +++ b/drivers/acpi/acpica/hwregs.c > >>> @@ -86,24 +86,22 @@ acpi_hw_get_access_bit_width(struct > >>> acpi_generic_address *reg, u8 max_bit_width) > >>> u64 address; > >>> > >>> if (!reg->access_width) { > >>> + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { > >>> + max_bit_width = 32; > >>> + } > >>> /* > >>>* Detect old register descriptors w
Re: [PATCH 2/2] ASoC: cs53l30: Check return value of regcache_sync()
On 5/31/16, 6:06 PM, "Nicolin Chen" wrote: >Regcache_sync() might fail. So this patch adds a return value Check for >it. > >Signed-off-by: Nicolin Chen >--- > sound/soc/codecs/cs53l30.c | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > >diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c >index 9aff449..ac90dd7 100644 >--- a/sound/soc/codecs/cs53l30.c >+++ b/sound/soc/codecs/cs53l30.c >@@ -1055,7 +1055,11 @@ static int cs53l30_runtime_resume(struct device >*dev) > gpiod_set_value_cansleep(cs53l30->reset_gpio, 1); > > regcache_cache_only(cs53l30->regmap, false); >- regcache_sync(cs53l30->regmap); >+ ret = regcache_sync(cs53l30->regmap); >+ if (ret) { >+ dev_err(dev, "failed to synchronize regcache: %d\n", ret); >+ return ret; >+ } > > return 0; > } >-- >2.1.4 Acked-by: Paul Handrigan
Re: [PATCH 1/2] ASoC: cs53l30: Rename the volume controls for preamplifier
On 5/31/16, 6:06 PM, "Nicolin Chen" wrote: >Volume controls should end with 'Volume', so this patch renames them for >ADC preamplifier. > >Signed-off-by: Nicolin Chen >--- > sound/soc/codecs/cs53l30.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > >diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c >index 714e579..9aff449 100644 >--- a/sound/soc/codecs/cs53l30.c >+++ b/sound/soc/codecs/cs53l30.c >@@ -331,10 +331,10 @@ static const struct snd_kcontrol_new >cs53l30_snd_controls[] = { > SOC_SINGLE_TLV("ADC2 NG Boost Volume", CS53L30_ADC2_NG_CTL, > CS53L30_ADCx_NG_BOOST_SHIFT, 1, 0, adc_ng_boost_tlv), > >- SOC_DOUBLE_R_TLV("ADC1 Pre Amp Gain", CS53L30_ADC1A_AFE_CTL, >+ SOC_DOUBLE_R_TLV("ADC1 Preamplifier Volume", CS53L30_ADC1A_AFE_CTL, >CS53L30_ADC1B_AFE_CTL, CS53L30_ADCxy_PREAMP_SHIFT, >2, 0, pga_preamp_tlv), >- SOC_DOUBLE_R_TLV("ADC2 Pre Amp Gain", CS53L30_ADC2A_AFE_CTL, >+ SOC_DOUBLE_R_TLV("ADC2 Preamplifier Volume", CS53L30_ADC2A_AFE_CTL, >CS53L30_ADC2B_AFE_CTL, CS53L30_ADCxy_PREAMP_SHIFT, >2, 0, pga_preamp_tlv), > >-- >2.1.4 Acked-by: Paul Handrigan
Re: [PATCH v2 0/4] hw rng support for NSP SoC
On Tue, May 31, 2016 at 10:09:39AM -0700, Florian Fainelli wrote: > > FYI, ARM Device Tree patches usually go via ARM SoC pull requests, so it > is best if this is planned in advance. Can you make sure you document > that there could be a merge conflict in your pull request to Linus? Sure I can do that. Thanks, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
[PATCH] Input: raydium_i2c_ts - do not ignore EPROBE_DEFER from gpiod_get_optional
We should not be ignoring -EPROBE_DEFER reported by devm_gpiod_get_optional(), but report it as any other error to the upper layers. While we are at it simplify check for the presence of reset GPIO and instead of using IS_ERR_OR_NULL just use boolean. Also do not return -ENOMEM from suspend handler when the device in bootloader mode as that does not make sense and switch to -EBUSY instead. Reported-by: Guenter Roeck Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/raydium_i2c_ts.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 1ae65f6..f3076d9 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -930,7 +930,7 @@ static int raydium_i2c_power_on(struct raydium_data *ts) { int error; - if (IS_ERR_OR_NULL(ts->reset_gpio)) + if (!ts->reset_gpio) return 0; gpiod_set_value_cansleep(ts->reset_gpio, 1); @@ -967,7 +967,7 @@ static void raydium_i2c_power_off(void *_data) { struct raydium_data *ts = _data; - if (!IS_ERR_OR_NULL(ts->reset_gpio)) { + if (ts->reset_gpio) { gpiod_set_value_cansleep(ts->reset_gpio, 1); regulator_disable(ts->vccio); regulator_disable(ts->avdd); @@ -1018,11 +1018,10 @@ static int raydium_i2c_probe(struct i2c_client *client, GPIOD_OUT_LOW); if (IS_ERR(ts->reset_gpio)) { error = PTR_ERR(ts->reset_gpio); - if (error != -EPROBE_DEFER) { + if (error != -EPROBE_DEFER) dev_err(&client->dev, "failed to get reset gpio: %d\n", error); - return error; - } + return error; } error = raydium_i2c_power_on(ts); @@ -1138,7 +1137,7 @@ static int __maybe_unused raydium_i2c_suspend(struct device *dev) /* Sleep is not available in BLDR recovery mode */ if (ts->boot_mode != RAYDIUM_TS_MAIN) - return -ENOMEM; + return -EBUSY; disable_irq(client->irq); -- 2.8.0.rc3.226.g39d4020 -- Dmitry
RE: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in acpi_hw_get_access_bit_width()
Hi, > From: Mike Marshall [mailto:hub...@omnibond.com] > Subject: Re: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in > acpi_hw_get_access_bit_width() > > Hi Lv... > > I was dead in the water before this patch, qemu-kvm would crash > right away, now everything seems to work great again, thanks! From > my perspective this fixes the c3bc26d problem. [Lv Zheng] Great. The bisection result is c3bc26d, but the code is actually upstreamed in b314a172. c3bc26d is the first commit enabled the bug. :) > > Acked-by: Mike Marshall [Lv Zheng] Thanks for the test. Best regards -Lv > > -Mike > > On Tue, May 31, 2016 at 3:13 AM, Zheng, Lv wrote: > > Hi, Boris and Mike > > > > Please help to validate if this version can also fix your issues. > > After enumerating the possible cases, I realized that the address check > might not be necessary. > > But we need a max_bit_width check in this function to make it prepared > for a future usage in acpi_read()/acpi_write(). > > Thanks in advance. > > > > Best regards > > -Lv > > > >> From: Zheng, Lv > >> Subject: [RFC PATCH v2] ACPICA / Hardware: Fix old register check in > >> acpi_hw_get_access_bit_width() > >> > >> The address check in acpi_hw_get_access_bit_width() should be byte > >> width > >> based, not bit width based. This patch fixes this mistake. > >> > >> For those who want to review acpi_hw_access_bit_width(), here is the > >> concerns and the design details of the function: > >> > >> It is supposed that the GAS Address field should be aligned to the byte > >> width indicated by the GAS AccessSize field. Similarly, for the old non > >> GAS register, it is supposed that its Address should be aligned to its > >> Length. > >> For the "AccessSize = 0 (meaning ANY)" case, we try to return the > >> maximum > >> instruction width (64 for MMIO or 32 for PIO) or the user expected > access > >> bit width (64 for acpi_read()/acpi_write() or 32 for acpi_hw_read()/ > >> acpi_hw_write()) for futher operation and it is supposed that the GAS > >> Address field should always be aligned to the maximum expected > access > >> bit > >> width (otherwise it can't be ANY). > >> > >> The problem is in acpi_tb_init_generic_address(), where the non GAS > >> register's Length is converted into the GAS BitWidth field, its Address is > >> converted into the GAS Address field, and the GAS AccessSize field is > left > >> 0 but most of the register actually cannot be accessed using "ANY" > >> accesses. > >> > >> As a conclusion, when AccessSize = 0 (ANY), the Address should either > be > >> aligned to the BitWidth (wrong conversion) or aligned to 32 (PIO) or 64 > >> (MMIO). Since BitWidth for the wrong conversion is 8,16,32, the > Address > >> of the real GAS should always be aligned to 8,16,32, the address > alignment > >> check is not necessary. But we in fact could enhance the check for a > future > >> case where max_bit_width could be 64 for a PIO access issued from > >> acpi_read()/acpi_write(). > >> > >> Fixes: b314a172ee96 ("ACPICA: Hardware: Add optimized access bit > width > >> support") > >> Cc: Boris Ostrovsky > >> Cc: Mike Marshall > >> Suggested-by: Jan Beulich > >> Signed-off-by: Lv Zheng > >> --- > >> drivers/acpi/acpica/hwregs.c | 16 +++- > >> 1 file changed, 7 insertions(+), 9 deletions(-) > >> > >> diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c > >> index 0f18dbc..0553c0b 100644 > >> --- a/drivers/acpi/acpica/hwregs.c > >> +++ b/drivers/acpi/acpica/hwregs.c > >> @@ -86,24 +86,22 @@ acpi_hw_get_access_bit_width(struct > >> acpi_generic_address *reg, u8 max_bit_width) > >> u64 address; > >> > >> if (!reg->access_width) { > >> + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { > >> + max_bit_width = 32; > >> + } > >> /* > >>* Detect old register descriptors where only the bit_width > >> field > >>* makes senses. The target address is copied to handle > >> possible > >>* alignment issues. > >>*/ > >> ACPI_MOVE_64_TO_64(&address, ®->address); > >> - if (!reg->bit_offset && reg->bit_width && > >> + if (reg->bit_width < max_bit_width && > >> + !reg->bit_offset && reg->bit_width && > >> ACPI_IS_POWER_OF_TWO(reg->bit_width) && > >> - ACPI_IS_ALIGNED(reg->bit_width, 8) && > >> - ACPI_IS_ALIGNED(address, reg->bit_width)) { > >> + ACPI_IS_ALIGNED(reg->bit_width, 8)) { > >> return (reg->bit_width); > >> - } else { > >> - if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) > >> { > >> - return (32); > >> - } else { > >> - return (max_bit_width); > >> - } > >> } > >> + return (max_bit_width); > >> } else {
Re: ###$$$@#
- Original Message - Sent: Tue, 31 May 2016 21:02:28 -0400 (EDT) Subject: ###$$$@# BMW WINNING NOTIFICATION!!!
Re: shrink_active_list/try_to_release_page bug? (was Re: xfs trace in 4.4.2 / also in 4.3.3 WARNING fs/xfs/xfs_aops.c:1232 xfs_vm_releasepage)
On Tue, May 31, 2016 at 11:50:31AM +0200, Jan Kara wrote: > On Tue 31-05-16 10:07:24, Minchan Kim wrote: > > On Tue, May 31, 2016 at 08:36:57AM +1000, Dave Chinner wrote: > > > [adding lkml and linux-mm to the cc list] > > > > > > On Mon, May 30, 2016 at 09:23:48AM +0200, Stefan Priebe - Profihost AG > > > wrote: > > > > Hi Dave, > > > > Hi Brian, > > > > > > > > below are the results with a vanilla 4.4.11 kernel. > > > > > > Thanks for persisting with the testing, Stefan. > > > > > > > > > > > > > i've now used a vanilla 4.4.11 Kernel and the issue remains. After a > > > > fresh reboot it has happened again on the root FS for a debian apt file: > > > > > > > > XFS (md127p3): ino 0x41221d1 delalloc 1 unwritten 0 pgoff 0x0 size > > > > 0x12b990 > > > > [ cut here ] > > > > WARNING: CPU: 1 PID: 111 at fs/xfs/xfs_aops.c:1239 > > > > xfs_vm_releasepage+0x10f/0x140() > > > > Modules linked in: netconsole ipt_REJECT nf_reject_ipv4 xt_multiport > > > > iptable_filter ip_tables x_tables bonding coretemp 8021q garp fuse > > > > sb_edac edac_core i2c_i801 i40e(O) xhci_pci xhci_hcd shpchp vxlan > > > > ip6_udp_tunnel udp_tunnel ipmi_si ipmi_msghandler button btrfs xor > > > > raid6_pq dm_mod raid1 md_mod usbhid usb_storage ohci_hcd sg sd_mod > > > > ehci_pci ehci_hcd usbcore usb_common igb ahci i2c_algo_bit libahci > > > > i2c_core mpt3sas ptp pps_core raid_class scsi_transport_sas > > > > CPU: 1 PID: 111 Comm: kswapd0 Tainted: G O4.4.11 #1 > > > > Hardware name: Supermicro Super Server/X10SRH-CF, BIOS 1.0b 05/18/2015 > > > > 880c4dacfa88 a23c5b8f > > > > a2a51ab4 880c4dacfac8 a20837a7 880c4dacfae8 > > > > 0001 ea00010c3640 8802176b49d0 ea00010c3660 > > > > Call Trace: > > > > [] dump_stack+0x63/0x84 > > > > [] warn_slowpath_common+0x97/0xe0 > > > > [] warn_slowpath_null+0x1a/0x20 > > > > [] xfs_vm_releasepage+0x10f/0x140 > > > > [] ? page_mkclean_one+0xd0/0xd0 > > > > [] ? anon_vma_prepare+0x150/0x150 > > > > [] try_to_release_page+0x32/0x50 > > > > [] shrink_active_list+0x3ce/0x3e0 > > > > [] shrink_lruvec+0x687/0x7d0 > > > > [] shrink_zone+0xdc/0x2c0 > > > > [] kswapd+0x4f9/0x970 > > > > [] ? mem_cgroup_shrink_node_zone+0x1a0/0x1a0 > > > > [] kthread+0xc9/0xe0 > > > > [] ? kthread_stop+0x100/0x100 > > > > [] ret_from_fork+0x3f/0x70 > > > > [] ? kthread_stop+0x100/0x100 > > > > ---[ end trace c9d679f8ed4d7610 ]--- > > > > XFS (md127p3): ino 0x41221d1 delalloc 1 unwritten 0 pgoff 0x1000 size > > > > 0x12b990 > > > > XFS (md127p3): ino 0x41221d1 delalloc 1 unwritten 0 pgoff 0x2000 size > > > . > > > > > > Ok, I suspect this may be a VM bug. I've been looking at the 4.6 > > > code (so please try to reproduce on that kernel!) but it looks to me > > > like the only way we can get from shrink_active_list() direct to > > > try_to_release_page() is if we are over the maximum bufferhead > > > threshold (i.e buffer_heads_over_limit = true) and we are trying to > > > reclaim pages direct from the active list. > > > > > > Because we are called from kswapd()->balance_pgdat(), we have: > > > > > > struct scan_control sc = { > > > .gfp_mask = GFP_KERNEL, > > > .order = order, > > > .priority = DEF_PRIORITY, > > > .may_writepage = !laptop_mode, > > > .may_unmap = 1, > > > .may_swap = 1, > > > }; > > > > > > The key point here is reclaim is being run with .may_writepage = > > > true for default configuration kernels. when we get to > > > shrink_active_list(): > > > > > > if (!sc->may_writepage) > > > isolate_mode |= ISOLATE_CLEAN; > > > > > > But sc->may_writepage = true and this allows isolate_lru_pages() to > > > isolate dirty pages from the active list. Normally this isn't a > > > problem, because the isolated active list pages are rotated to the > > > inactive list, and nothing else happens to them. *Except when > > > buffer_heads_over_limit = true*. This special condition would > > > explain why I have never seen apt/dpkg cause this problem on any of > > > my (many) Debian systems that all use XFS > > > > > > In that case, shrink_active_list() runs: > > > > > > if (unlikely(buffer_heads_over_limit)) { > > > if (page_has_private(page) && trylock_page(page)) { > > > if (page_has_private(page)) > > > try_to_release_page(page, 0); > > > unlock_page(page); > > > } > > > } > > > > > > i.e. it locks the page, and if it has buffer heads it trys to get > > > the bufferheads freed from the page. > > > > > > But this is a dirty page, which means it may have delalloc or > > > unwritten state on it's buffers, both of which indicate that there > > > is dirty data in teh page that hasn't been written. XFS issues a > > > warning on this because neither shr
[PATCH 2/2] arm64: defconfig: Enable Cadence MACB/GEM support
This patch enables the cadence MACB/GEM support that is needed by lg1k SoCs. Signed-off-by: Chanho Min --- arch/arm64/configs/defconfig |1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index fd2d74d..4edbbac 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -120,6 +120,7 @@ CONFIG_TUN=y CONFIG_VIRTIO_NET=y CONFIG_AMD_XGBE=y CONFIG_NET_XGENE=y +CONFIG_MACB=y CONFIG_E1000E=y CONFIG_IGB=y CONFIG_IGBVF=y -- 1.7.9.5
[PATCH 1/2] arm64: dts: Add dts files for LG Electronics's lg1313 SoC
Add dtsi file to support lg1313 SoC which based on Cortex-A53. Also add dts file to support lg1312 reference board which based on lg1313 SoC. Signed-off-by: Chanho Min --- arch/arm64/boot/dts/lg/Makefile |1 + arch/arm64/boot/dts/lg/lg1313-ref.dts | 36 arch/arm64/boot/dts/lg/lg1313.dtsi| 351 + 3 files changed, 388 insertions(+) create mode 100644 arch/arm64/boot/dts/lg/lg1313-ref.dts create mode 100644 arch/arm64/boot/dts/lg/lg1313.dtsi diff --git a/arch/arm64/boot/dts/lg/Makefile b/arch/arm64/boot/dts/lg/Makefile index b0cc649..5c7b54c1 100644 --- a/arch/arm64/boot/dts/lg/Makefile +++ b/arch/arm64/boot/dts/lg/Makefile @@ -1,4 +1,5 @@ dtb-$(CONFIG_ARCH_LG1K) += lg1312-ref.dtb +dtb-$(CONFIG_ARCH_LG1K) += lg1313-ref.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/lg/lg1313-ref.dts b/arch/arm64/boot/dts/lg/lg1313-ref.dts new file mode 100644 index 000..df0ece4 --- /dev/null +++ b/arch/arm64/boot/dts/lg/lg1313-ref.dts @@ -0,0 +1,36 @@ +/* + * dts file for lg1313 Reference Board. + * + * Copyright (C) 2016, LG Electronics + */ + +/dts-v1/; + +#include "lg1313.dtsi" + +/ { + #address-cells = <2>; + #size-cells = <1>; + + model = "LG Electronics, DTV SoC LG1313 Reference Board"; + compatible = "lge,lg1313-ref", "lge,lg1313"; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + serial2 = &uart2; + }; + + memory { + device_type = "memory"; + reg = <0x0 0x 0x2000>; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; +}; + +&uart0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi new file mode 100644 index 000..e703e11 --- /dev/null +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -0,0 +1,351 @@ +/* + * dts file for lg1313 SoC + * + * Copyright (C) 2016, LG Electronics + */ + +#include +#include + +/ { + #address-cells = <2>; + #size-cells = <2>; + + compatible = "lge,lg1313"; + interrupt-parent = <&gic>; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x0>; + next-level-cache = <&L2_0>; + }; + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x1>; + enable-method = "psci"; + next-level-cache = <&L2_0>; + }; + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x2>; + enable-method = "psci"; + next-level-cache = <&L2_0>; + }; + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0x0 0x3>; + enable-method = "psci"; + next-level-cache = <&L2_0>; + }; + L2_0: l2-cache0 { + compatible = "cache"; + }; + }; + + psci { + compatible = "arm,psci-0.2", "arm,psci"; + method = "smc"; + cpu_suspend = <0x8401>; + cpu_off = <0x8402>; + cpu_on = <0x8403>; + }; + + gic: interrupt-controller@c0001000 { + #interrupt-cells = <3>; + compatible = "arm,gic-400"; + interrupt-controller; + reg = <0x0 0xc0001000 0x1000>, + <0x0 0xc0002000 0x2000>, + <0x0 0xc0004000 0x2000>, + <0x0 0xc0006000 0x2000>; + }; + + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = , +, +, +; + interrupt-affinity = <&cpu0>, +<&cpu1>, +<&cpu2>, +<&cpu3>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = , +, +, +; + }; + + clk_bus: clk_bus { + #clock-cells = <0>; + + compatible = "fixed-clock"; + clock-frequency = <19800>; + clock-output-names = "BUSCLK"; + }; + + soc { +
Re: [PATCH v2 0/10] Add RK3399 eDP support and fix some bugs to analogix_dp driver.
Javier, Mark, Inki, Jingoo On 06/01/2016 04:01 AM, Javier Martinez Canillas wrote: Hello Yakir, On 05/27/2016 02:16 AM, Yakir Yang wrote: Hi Javier, On 05/26/2016 08:48 PM, Javier Martinez Canillas wrote: Hello Yakir, On 05/26/2016 05:34 AM, Yakir Yang wrote: Hi Javier, On 05/24/2016 01:01 PM, Yakir Yang wrote: Hi all, This series have been posted about one month, still no comments, help here :( This series works rightly on Rockchip platform, and most of them haven't touch the common analogix_dp driver (except for the hotplug fixed). So i guess Exynos platform should also happy with this changes. But not sure about that. So, is it possible that you could help to check this on Exynos Chromebook, if so i would be very grateful about that. Of course, I' ll test. Could you please provide me a branch that I can pull directly to avoid cherry-picking all the patches from the list? Ah, thanks a lot, I do have a tree https://github.com/yakir-Yang/linux/tree/fromlist/3399-edp I tested your branch on an Exynos5800 Peach Pi Chromebook and display is working correctly. So feel free to add for the whole series: Tested-by: Javier Martinez Canillas Hi Javier, Thank you very much, it's great to hear that, and make a big step forward. :-D Hi Mark, Inki, Jingoo Those patches have been tested on Exynos and Rockchip platform rightly, and This patches have been touched some rockchip_drm and analogix_dp core code, it would be great to get some reviewed/acked from you. And for now this patch-set have been tested on Rockchip and Exynos platform rightly, so could you help to share some further comments here ;) Thanks, - Yakir Best regards,
Re: [PATCH v2 05/10] drm/rockchip: analogix_dp: add rk3399 eDP support
On 2016年05月24日 13:02, Yakir Yang wrote: RK3399 and RK3288 shared the same eDP IP controller, only some light difference with VOP configure and GRF configure. Signed-off-by: Yakir Yang --- Changes in v2: - rebase with drm-next, fix some conflicts .../bindings/display/bridge/analogix_dp.txt| 1 + .../display/rockchip/analogix_dp-rockchip.txt | 2 +- drivers/gpu/drm/rockchip/analogix_dp-rockchip.c| 34 -- include/drm/bridge/analogix_dp.h | 1 + 4 files changed, 35 insertions(+), 3 deletions(-) Looks for me, So for the drm/rockchip side: Acked-by: Mark Yao Thanks. -- Mark Yao
Re: [PATCH 4/5] clocksource: rockchip: add support for rk3399 SoC
Hi Daniel: On 2016年05月31日 22:06, Daniel Lezcano wrote: >> >> @@ -46,15 +48,20 @@ static inline void __iomem *rk_base(struct >> clock_event_device *ce) >> return rk_timer(ce)->base; >> } >> >> +static inline void __iomem *rk_ctrl(struct clock_event_device *ce) >> +{ >> +return rk_timer(ce)->base + rk_timer(ce)->ctrl; > > You can do a small optimization by pre-computing 'ctrl' at init time, so > no need to do this addition each time. I understand what you mean, please see comment below. And even we use ctrl as pointer, we still will get addition LDR other then ADD. This is disassemble code before: 0: f9408021ldr x1, [x1,#256] 4: 5283mov w3, #0x0 8: 91004022add x2, x1, #0x10 c: b943str w3, [x2] This is disassemble code after change: 0: 5283mov w3, #0x0 4: f9408422ldr x2, [x1,#264] 8: b943str w3, [x2] c: f9408021ldr x1, [x1,#256] Of course we can assume cache hit. > >> +} >> + >> static inline void rk_timer_disable(struct clock_event_device *ce) >> { >> -writel_relaxed(TIMER_DISABLE, rk_base(ce) + TIMER_CONTROL_REG); >> +writel_relaxed(TIMER_DISABLE, rk_ctrl(ce)); >> } >> >> static inline void rk_timer_enable(struct clock_event_device *ce, u32 >> flags) >> { >> writel_relaxed(TIMER_ENABLE | TIMER_INT_UNMASK | flags, >> - rk_base(ce) + TIMER_CONTROL_REG); >> + rk_ctrl(ce)); >> } >> >> static void rk_timer_update_counter(unsigned long cycles, >> @@ -179,4 +186,19 @@ out_unmap: >> iounmap(bc_timer.base); >> } >> >> -CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init); >> +static void __init rk3288_timer_init(struct device_node *np) >> +{ >> +bc_timer.ctrl = TIMER_CONTROL_REG3288; >> +rk_timer_init(np); > > rk_timer_init(np); > bc_timer.ctrl = bc_timer.base + TIMER_CONTROL_REG3288; No. It's not such simple. You will access null pointer when rk_timer_init, if we keep rk_timer_disable call in init or after request_irq/clockevents_config_and_register and interrupt happen immediately. So the code maybe: static void __init rk3288_timer_init(struct device_node *np) { bc_timer.base = of_iomap(np, 0); if (!bc_timer.base) { pr_err("Failed to get base address for '%s'\n", TIMER_NAME); return; } bc_timer.ctrl = bc_timer.base + TIMER_CONTROL_REG3288; rk_imter_init(np); // of course remove of_iomap from init. Is this what you want?
Re: [PATCH v2 02/10] drm/rockchip: analogix_dp: split the lcdc select setting into device data
On 2016年05月24日 13:02, Yakir Yang wrote: eDP controller need to declare which vop provide the video source, and it's defined in GRF registers. But different chips have different GRF register address, so we need to create a device data to declare the GRF messages for each chips. Signed-off-by: Yakir Yang Looks for me, So: Acked-by: Mark Yao -- Mark Yao
Re: [RFC PATCH 1/2] sched: Clean up SD_BALANCE_WAKE flags in sched domain build-up
On Tue, May 31, 2016 at 12:41:21PM +0200, Peter Zijlstra wrote: > On Tue, May 31, 2016 at 09:31:32AM +0800, Yuyang Du wrote: > > On Tue, May 31, 2016 at 11:21:46AM +0200, Peter Zijlstra wrote: > > > On Tue, May 31, 2016 at 09:11:37AM +0800, Yuyang Du wrote: > > > > The SD_BALANCE_WAKE is irrelevant in the contexts of these two removals, > > > > and in addition SD_BALANCE_WAKE is not and should not be set in any > > > > sched_domain flags so far. > > > > > > This Changelog doesn't make any sense... > > > > How? SD_BALANCE_WAKE is not in any sched_domain flags (sd->flags), even if > > it is, it is not used anywhere, no? > > It is and it is. See select_task_fair_rq(): > > if (tmp->flags & sd_flags) > > Now, as long as WAKE_AFFINE is also set, its hard to actually get into > the find_idlest_cpu() balancing, but if you clear all that you will > still get there. Well, that is very true, and the next patch (2/2) just makes all this what this is supposed to be: the SD_BALANCE_WAKE is a meaningful sched_domain flag. This particular patch is a pure cleanup, may I amend the changelog to: According to the comment: "turn off/on idle balance on this domain", the SD_BALANCE_WAKE has nothing to do with idle balance, so clean them up.
Re: + zram-switch-to-crypto-compress-api.patch added to -mm tree
On Wed, 1 Jun 2016 10:26:32 +0900 Sergey Senozhatsky wrote: > On (05/31/16 12:10), a...@linux-foundation.org wrote: > > The patch titled > > Subject: zram: switch to crypto compress API > > has been added to the -mm tree. Its filename is > > zram-switch-to-crypto-compress-api.patch > > > > This patch should soon appear at > > > > http://ozlabs.org/~akpm/mmots/broken-out/zram-switch-to-crypto-compress-api.patch > > and later at > > > > http://ozlabs.org/~akpm/mmotm/broken-out/zram-switch-to-crypto-compress-api.patch > > Andrew, > > I'd prefer this patch to also include changes from this one: > http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch > (http://article.gmane.org/gmane.linux.kernel/2231316) > > (per Minchan). > > IOW, squash > > http://ozlabs.org/~akpm/mmots/broken-out/zram-switch-to-crypto-compress-api.patch > and > > http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch Done.
Re: [PATCH resend v2 1/6] regulator: axp20x: support AXP809 variant
On Wed, Jun 1, 2016 at 1:36 AM, Mark Brown wrote: > On Wed, Jun 01, 2016 at 12:23:19AM +0800, Chen-Yu Tsai wrote: >> The X-Powers AXP809 PMIC has a similar set of regulators as the AXP221, >> though a few LDOs were removed, and a new switch output added. Like the >> AXP221, AXP809 also has DC1SW and DC5LDO, which are internally chained >> to DCDC1 and DCDC5, respectively. >> >> Add support for this new variant. Also remove the "axp22x_" prefix from >> DC1SW/DC5LDO supply handling code, as the AXP809 uses it as well. >> >> Signed-off-by: Chen-Yu Tsai >> --- > > Please preserve tags when people review things. Why wasn't this applied > when I originally reviewed it? The mfd patches this one depended on were pushed around a week before the merge window, about a month after you reviewed, merged, then backed out this patch. There was no immutable branch either. I thought it best to wait a cycle instead of rushing in a patch at the last minute. Regards ChenYu
Re: [PATCH] i2c_hid: enable i2c-hid devices to suspend/resume asynchronously
On 5/25/2016 1:31 AM, Dmitry Torokhov wrote: > On Tue, May 24, 2016 at 3:24 AM, Jiri Kosina wrote: > >> On Thu, 19 May 2016, Fu, Zhonghui wrote: >> >>> i2c-hid devices' suspend/resume are usually time-consuming process. >>> For example, the touch controller(i2c-ATML1000:00) on ASUS T100 tablet >>> takes about 160ms for suspending and 120ms for resuming. This patch >>> enables i2c-hid devices to suspend/resume asynchronously. This will >>> take advantage of multicore and speed up system suspend/resume process. >> Umm, why do we need this in the first place, given the fact that we are >> enabling async suspend for each and every HID device allocated via >> hid_allocate_device() already? >> > i2c-hid is not a HID but I2C device on I2C bus and is currently resumed > synchronously, in-line. Since it tries to reset device as part of resume it > may take a while for the device to respond. Sorry for late reply. Yes, the devices driven by i2c-hid driver are I2C devices, not HID devices. > > BTW, I am not quite sure what async suspend enabled > in hid_allocate_device() does for us, given that I do not see HID bus > implementing the suspend/resume support for its devices (as far as I > remember we rely on underlying transports - USB or I2C - to resume the > devices properly). Even if HID bus does not implement the suspend/resume support for its devices, the power domain, device type, device class or device driver subsystem related to HID devices can still implement the suspend/resume support for HID devices. Thanks, Zhonghui > > Thanks, > Dmitry >
Re: + zram-switch-to-crypto-compress-api.patch added to -mm tree
On (05/31/16 19:01), Andrew Morton wrote: > > Andrew, > > > > I'd prefer this patch to also include changes from this one: > > http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch > > (http://article.gmane.org/gmane.linux.kernel/2231316) > > > > (per Minchan). > > > > IOW, squash > > > > http://ozlabs.org/~akpm/mmots/broken-out/zram-switch-to-crypto-compress-api.patch > > and > > > > http://ozlabs.org/~akpm/mmots/broken-out/zram-align-zcomp-interface-to-crypto-comp-api.patch > > Done. thanks! -ss
Re: [RFC][PATCH 1/2] time: Fix problematic change in settimeofday error checking
> @@ -21,6 +21,9 @@ static inline int do_sys_settimeofday(const struct timespec > *tv, > struct timespec64 ts64; > > if (!tv) > + return do_sys_settimeofday64(NULL, tz); > + > + if (tv && !timespec_valid(tv)) > return -EINVAL; Looks like an extra check for (tv), maybe it will be better to use: + if (!timespec_valid(tv)) Regards, Dima.
A "NULL pointer dereference " problem in pick_next_task_fair in linux 3.10.0 based
hi everybody: I have encountered a "NULL pointer dereference" problem in pick_next_task_fair() in linux 3.10.0 based static struct task_struct *pick_next_task_fair(struct rq *rq) { struct task_struct *p; struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; if (!cfs_rq->nr_running) return NULL; do { se = pick_next_entity(cfs_rq); set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); p = task_of(se); if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); return p; } In pick_next_entity(): static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_first_entity(cfs_rq); struct sched_entity *left = se; /* * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ if (cfs_rq->skip == se) { struct sched_entity *second = __pick_next_entity(se); if (second && wakeup_preempt_entity(second, left) < 1) se = second; } /* * Prefer last buddy, try to return the CPU to a preempted task. */ if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) se = cfs_rq->last; /* * Someone really wants this to run. If it's not unfair, run it. */ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) se = cfs_rq->next; clear_buddies(cfs_rq, se); return se; } in pick_next_entity 1. struct sched_entity *se = __pick_first_entity(cfs_rq); This se may be get NULL. 2. Then if cfs_rq->skip is also null, cfs_rq->skip == se. 3. struct sched_entity *second = __pick_next_entity(se); This code will lead to the "NULL pointer dereference" because se is NULL in __pick_next_entity -> rb_next() The oops is as follow: [795797.511960] BUG: unable to handle kernel NULL pointer dereference at 0010 [795797.622950] task: 883f24924560 ti: 883f2495c000 task.ti: 883f2495c000 [795797.630541] RIP: 0010:[] [] rb_next+0x1/0x50 [795797.638153] RSP: 0018:883f2495fdc0 EFLAGS: 00010046 [795797.643575] RAX: RBX: RCX: [795797.650824] RDX: 0001 RSI: 883f7f5f4868 RDI: 0010 [795797.658068] RBP: 883f2495fe08 R08: R09: [795797.666476] R10: R11: 883f249f5400 R12: 883d82207200 [795797.674821] R13: R14: R15: [795797.683160] FS: () GS:883f7f5e() knlGS: [795797.692460] CS: 0010 DS: ES: CR0: 80050033 [795797.699395] CR2: 0010 CR3: 003f1de58000 CR4: 003407e0 [795797.707703] DR0: DR1: DR2: [795797.715992] DR3: DR6: fffe0ff0 DR7: 0400 [795797.724267] Stack: [795797.727416] 883f2495fe08 810b87f9 883f2495fe08 883f7f5f47c0 [795797.736040] 883f24924b40 883f7f5f47c0 003f 883f24924560 [795797.744658] 883f2495fe68 816324ea 883f24924560 [795797.753255] Call Trace: [795797.756818] [] ? pick_next_task_fair+0x129/0x1d0 [795797.764207] [] __schedule+0x12a/0x910 [795797.770622] [] schedule+0x29/0x70 [795797.776683] [] smpboot_thread_fn+0xd3/0x1a0 [795797.783599] [] ? schedule+0x29/0x70 [795797.789807] [] ? lg_double_unlock+0x90/0x90 [795797.796706] [] kthread+0xcf/0xe0 [795797.802632] [] ? kthread_create_on_node+0x140/0x140 [795797.810207] [] ret_from_fork+0x58/0x90 [795797.816646] [] ? kthread_create_on_node+0x140/0x140 I find there are something different in upstream (in pick_next_entity ) static struct sched_entity * pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) { struct sched_entity *left = __pick_first_entity(cfs_rq); struct sched_entity *se; /* * If curr is set we have to see if its left of the leftmost entity * still in the tree, provided there was anything in the tree at all. */ if (!left || (curr && entity_before(curr, left))) left = curr; se = left; /* ideally we run the leftmost entity */ /* * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ if (cfs_rq->skip == se) { struct sched_entity *second; if (se == curr) { second = __pick_first_entity(cfs_rq); } else { second = __pick_next_entity(se); if (!second || (curr && entity_before(curr, second))) second = curr; } if (second && wakeup_preempt_entity(second, left) < 1) se
Re: [PATCH] [SCSI] aacraid: use kmemdup
> "Muhammad" == Muhammad Falak R Wani writes: Muhammad> Use kmemdup when some other buffer is immediately copied into Muhammad> allocated region. It replaces call to allocation followed by Muhammad> memcpy, by a single call to kmemdup. Applied to 4.8/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v2 4/8] zram: use crypto api to check alg availability
On Wed, Jun 01, 2016 at 10:07:07AM +0900, Sergey Senozhatsky wrote: > Hello Minchan, > > On (06/01/16 09:03), Minchan Kim wrote: > [..] > > So, if we do 'cat /sys/block/zram0/comp_algorithm", every crypto modules > > in the backend array are loaded in memory and not unloaded until admin > > executes rmmod? Right? > > yes, I think so. It scares me. Common case, except one we choosed, every loaded modules will be not used. I think it's really not good. Although the wastage might be not big now, it will be heavy as crypto comp modules are increased. What do you think about it? > > [..] > > If user load out-of-tree crypto compression module, what's status of > > comp_algorithm? > > > > #> insmod foo_crypto.ko > > #> echo foo > /sys/block/zram0/comp_algorithm > > #> cat /sys/block/zram0/comp_algorithm > > lzo lz4 [foo] > > ? > > yes, "lzo lz4 [out-of-tree-module-name]". Makes sense!
[PATCH v2 06/27] staging: unisys: visorbus: modify format string to match argument
From: David Binder Modifies the format string of snprintf to expect an unsigned int instead of a signed one, per the supplied argument. Signed-off-by: David Binder Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/visorbus/visorbus_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/unisys/visorbus/visorbus_main.c b/drivers/staging/unisys/visorbus/visorbus_main.c index cb08ce4..c30b4b2 100644 --- a/drivers/staging/unisys/visorbus/visorbus_main.c +++ b/drivers/staging/unisys/visorbus/visorbus_main.c @@ -433,7 +433,7 @@ static ssize_t client_bus_info_show(struct device *dev, if (vdev->name) partition_name = vdev->name; shift = snprintf(pos, remain, -"Client device / client driver info for %s eartition (vbus #%d):\n", +"Client device / client driver info for %s eartition (vbus #%u):\n", partition_name, vdev->chipset_dev_no); pos += shift; remain -= shift; -- 1.9.1
[PATCH v2 27/27] drivers: Add visorbus to the drivers directory
visorbus is currently located at drivers/staging/visorbus, this patch moves it to drivers/virt. Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/Kconfig| 3 +-- drivers/staging/unisys/Makefile | 1 - drivers/virt/Kconfig | 2 ++ drivers/virt/Makefile | 1 + drivers/{staging/unisys => virt}/visorbus/Kconfig | 0 drivers/{staging/unisys => virt}/visorbus/Makefile| 0 drivers/{staging/unisys => virt}/visorbus/controlvmchannel.h | 0 drivers/{staging/unisys => virt}/visorbus/controlvmcompletionstatus.h | 0 drivers/{staging/unisys => virt}/visorbus/iovmcall_gnuc.h | 0 drivers/{staging/unisys => virt}/visorbus/vbuschannel.h | 0 drivers/{staging/unisys => virt}/visorbus/vbusdeviceinfo.h| 0 drivers/{staging/unisys => virt}/visorbus/vbushelper.h| 0 drivers/{staging/unisys => virt}/visorbus/visorbus_main.c | 0 drivers/{staging/unisys => virt}/visorbus/visorbus_private.h | 0 drivers/{staging/unisys => virt}/visorbus/visorchannel.c | 0 drivers/{staging/unisys => virt}/visorbus/visorchipset.c | 0 drivers/{staging/unisys => virt}/visorbus/vmcallinterface.h | 0 17 files changed, 4 insertions(+), 3 deletions(-) rename drivers/{staging/unisys => virt}/visorbus/Kconfig (100%) rename drivers/{staging/unisys => virt}/visorbus/Makefile (100%) rename drivers/{staging/unisys => virt}/visorbus/controlvmchannel.h (100%) rename drivers/{staging/unisys => virt}/visorbus/controlvmcompletionstatus.h (100%) rename drivers/{staging/unisys => virt}/visorbus/iovmcall_gnuc.h (100%) rename drivers/{staging/unisys => virt}/visorbus/vbuschannel.h (100%) rename drivers/{staging/unisys => virt}/visorbus/vbusdeviceinfo.h (100%) rename drivers/{staging/unisys => virt}/visorbus/vbushelper.h (100%) rename drivers/{staging/unisys => virt}/visorbus/visorbus_main.c (100%) rename drivers/{staging/unisys => virt}/visorbus/visorbus_private.h (100%) rename drivers/{staging/unisys => virt}/visorbus/visorchannel.c (100%) rename drivers/{staging/unisys => virt}/visorbus/visorchipset.c (100%) rename drivers/{staging/unisys => virt}/visorbus/vmcallinterface.h (100%) diff --git a/drivers/staging/unisys/Kconfig b/drivers/staging/unisys/Kconfig index 4f1f5e6..dab09a9 100644 --- a/drivers/staging/unisys/Kconfig +++ b/drivers/staging/unisys/Kconfig @@ -3,7 +3,7 @@ # menuconfig UNISYSSPAR bool "Unisys SPAR driver support" - depends on X86_64 && !UML + depends on X86_64 && !UML && VIRT_DRIVERS select PCI select ACPI ---help--- @@ -11,7 +11,6 @@ menuconfig UNISYSSPAR if UNISYSSPAR -source "drivers/staging/unisys/visorbus/Kconfig" source "drivers/staging/unisys/visornic/Kconfig" source "drivers/staging/unisys/visorinput/Kconfig" source "drivers/staging/unisys/visorhba/Kconfig" diff --git a/drivers/staging/unisys/Makefile b/drivers/staging/unisys/Makefile index 20eb098..e45f44b 100644 --- a/drivers/staging/unisys/Makefile +++ b/drivers/staging/unisys/Makefile @@ -1,7 +1,6 @@ # # Makefile for Unisys SPAR drivers # -obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_UNISYS_VISORNIC) += visornic/ obj-$(CONFIG_UNISYS_VISORINPUT)+= visorinput/ obj-$(CONFIG_UNISYS_VISORHBA) += visorhba/ diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 99ebdde..0c60896 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -30,4 +30,6 @@ config FSL_HV_MANAGER 4) A kernel interface for receiving callbacks when a managed partition shuts down. +source "drivers/virt/visorbus/Kconfig" endif + diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile index c47f04d..44aebd2 100644 --- a/drivers/virt/Makefile +++ b/drivers/virt/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o +obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ diff --git a/drivers/staging/unisys/visorbus/Kconfig b/drivers/virt/visorbus/Kconfig similarity index 100% rename from drivers/staging/unisys/visorbus/Kconfig rename to drivers/virt/visorbus/Kconfig diff --git a/drivers/staging/unisys/visorbus/Makefile b/drivers/virt/visorbus/Makefile similarity index 100% rename from drivers/staging/unisys/visorbus/Makefile rename to drivers/virt/visorbus/Makefile diff --git a/drivers/staging/unisys/visorbus/controlvmchannel.h b/drivers/virt/visorbus/controlvmchannel.h similarity index 100% rename from drivers/staging/unisys/visorbus/controlvmchannel.h rename to drivers/virt/visorbus/controlvmchannel.h diff --git a/drivers/staging/unisys/visorbus/controlvmcompletionstatus.h b/drivers/virt/visorbus/controlvmcompletionstatus.h similarity index 100% rename from drivers/stagin
[PATCH v2 00/27] Fixed issues raised by tglx, then move visorbus to drivers/virt
tglx: The following patchset fixes issues you raised during your code review of visorbus on 5/18. Greg: Please drop all other patch series sent in from me as this patch series incorporates the required patches from the previous series. Converts visorbus to use a kernel timer for periodic device-specific callbacks instead of a workqueue, making the implementation in periodic_work.c and periodic_work.h no longer necessary. These files are then deleted. The visordriver_callback_lock has been switched to a mutex. Several module parameters and structures were removed that were no longer being used. Changes since v1: - Added the patch staging: unisys: visorbus change -1 return values - Added the patch staging: unisys: visorchipset change -1 return value - Added the patch staging: unisys: iovmcall_gnuc.h change -1 return values Bryan Thompson (4): staging: unisys: visorbus: Make visordriver_callback_lock a mutex staging: unisys: visorbus: Remove unnecessary EXPORT_SYMBOL statements staging: unisys: visorbus: Remove unused functions staging: unisys: Remove reference to unused STANDALONE_CLIENT David Binder (12): staging: unisys: visorbus: remove unused module parameters staging: unisys: visorbus: remove unused struct staging: unisys: visorbus: modify format string to match argument staging: unisys: visornic: Correct comment spelling mistake staging: unisys: include: Remove thread-related enum members staging: unisys: visorbus: vbusdeviceinfo function descriptions more kerneldoc-like staging: unisys: visorbus: make function descriptions more kerneldoc-like staging: unisys: visorbus: make visorbus_private.h function descriptions more kerneldoc-like staging: unisys: visorbus: make visorchannel function descriptions more kerneldoc-like staging: unisys: visorbus: make visorchipset function descriptions more kerneldoc-like staging: unisys: visorbus: Move visorbus-unique functions to private header staging: unisys: visorbus: Add kerneldoc-style comments for visorbus API David Kershner (4): staging: unisys: Move vbushelper.h to visorbus directory include: linux: visorbus: Add visorbus to include/linux directory Documentation: Move visorbus documentation from staging to Documentation/ drivers: Add visorbus to the drivers directory Erik Arfvidson (3): staging: unisys: visorbus change -1 return values staging: unisys: visorchipset change -1 return value staging: unisys: iovmcall_gnuc.h change -1 return values Tim Sell (4): staging: unisys: visorbus: removed unused periodic_test_workqueue staging: unisys: visorinput: remove unnecessary locking staging: unisys: visorbus: use kernel timer instead of workqueue staging: unisys: visorbus: remove periodic_work.h/.c .../ABI/stable/sysfs-bus-visorbus |0 .../overview.txt => Documentation/visorbus.txt |0 drivers/staging/unisys/Kconfig |3 +- drivers/staging/unisys/MAINTAINERS |2 +- drivers/staging/unisys/Makefile|1 - drivers/staging/unisys/include/periodic_work.h | 40 - drivers/staging/unisys/include/visorbus.h | 234 drivers/staging/unisys/visorbus/Makefile | 12 - drivers/staging/unisys/visorbus/periodic_work.c| 204 --- drivers/staging/unisys/visorbus/visorbus_main.c| 1344 drivers/staging/unisys/visorbus/visorbus_private.h | 68 - drivers/staging/unisys/visorbus/visorchannel.c | 635 - drivers/staging/unisys/visorhba/Makefile |2 - drivers/staging/unisys/visorhba/visorhba_main.c|5 +- drivers/staging/unisys/visorinput/Makefile |2 - drivers/staging/unisys/visorinput/visorinput.c | 63 +- drivers/staging/unisys/visornic/Makefile |2 - drivers/staging/unisys/visornic/visornic_main.c|7 +- drivers/virt/Kconfig |2 + drivers/virt/Makefile |1 + drivers/{staging/unisys => virt}/visorbus/Kconfig |0 drivers/virt/visorbus/Makefile |9 + .../unisys => virt}/visorbus/controlvmchannel.h|2 +- .../visorbus/controlvmcompletionstatus.h |0 .../unisys => virt}/visorbus/iovmcall_gnuc.h |4 +- .../unisys => virt}/visorbus/vbuschannel.h |3 +- .../unisys => virt}/visorbus/vbusdeviceinfo.h | 11 +- .../unisys/include => virt/visorbus}/vbushelper.h |0 drivers/virt/visorbus/visorbus_main.c | 1260 ++ drivers/virt/visorbus/visorbus_private.h | 96 ++ drivers/virt/visorbus/visorchannel.c | 459 +++ .../unisys => virt}/visorbus/visorchipset.c| 54 +- .../unisys => virt}/visorbus/vmcallinterface.h |5 +- .../include => include/linux/visorbus}/channel.h |0 .../linux/visorbus}/channel_guid.h |0 .../linux/visorbus
[PATCH v2 25/27] include: linux: visorbus: Add visorbus to include/linux directory
Update include/linux to include the s-Par associated common include header files needed for the s-Par visorbus. Since we have now moved the include directories over to include/linux/visorbus this patch makes all of the visor drivers visorbus, visorinput, visornic, and visorhba use the new include folders. Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/MAINTAINERS| 2 +- drivers/staging/unisys/visorbus/Makefile | 2 -- drivers/staging/unisys/visorbus/controlvmchannel.h| 2 +- drivers/staging/unisys/visorbus/vbuschannel.h | 3 ++- drivers/staging/unisys/visorbus/visorbus_main.c | 6 +++--- drivers/staging/unisys/visorbus/visorchannel.c| 4 ++-- drivers/staging/unisys/visorbus/visorchipset.c| 8 drivers/staging/unisys/visorbus/vmcallinterface.h | 5 ++--- drivers/staging/unisys/visorhba/Makefile | 2 -- drivers/staging/unisys/visorhba/visorhba_main.c | 5 ++--- drivers/staging/unisys/visorinput/Makefile| 2 -- drivers/staging/unisys/visorinput/visorinput.c| 6 +++--- drivers/staging/unisys/visornic/Makefile | 2 -- drivers/staging/unisys/visornic/visornic_main.c | 5 ++--- .../staging/unisys/include => include/linux/visorbus}/channel.h | 0 .../unisys/include => include/linux/visorbus}/channel_guid.h | 0 .../unisys/include => include/linux/visorbus}/diagchannel.h | 0 .../unisys/include => include/linux/visorbus}/guestlinuxdebug.h | 0 .../staging/unisys/include => include/linux/visorbus}/iochannel.h | 0 .../staging/unisys/include => include/linux/visorbus}/version.h | 0 .../staging/unisys/include => include/linux/visorbus}/visorbus.h | 0 21 files changed, 22 insertions(+), 32 deletions(-) rename {drivers/staging/unisys/include => include/linux/visorbus}/channel.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/channel_guid.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/diagchannel.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/guestlinuxdebug.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/iochannel.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/version.h (100%) rename {drivers/staging/unisys/include => include/linux/visorbus}/visorbus.h (100%) diff --git a/drivers/staging/unisys/MAINTAINERS b/drivers/staging/unisys/MAINTAINERS index 1f0425b..146a8c3 100644 --- a/drivers/staging/unisys/MAINTAINERS +++ b/drivers/staging/unisys/MAINTAINERS @@ -1,5 +1,5 @@ Unisys s-Par drivers M: David Kershner S: Maintained -F: Documentation/s-Par/overview.txt +F: Documentation/visorbus.txt F: drivers/staging/unisys/ diff --git a/drivers/staging/unisys/visorbus/Makefile b/drivers/staging/unisys/visorbus/Makefile index f3730d8..7f328cc 100644 --- a/drivers/staging/unisys/visorbus/Makefile +++ b/drivers/staging/unisys/visorbus/Makefile @@ -7,5 +7,3 @@ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus.o visorbus-y := visorbus_main.o visorbus-y += visorchannel.o visorbus-y += visorchipset.o - -ccflags-y += -Idrivers/staging/unisys/include diff --git a/drivers/staging/unisys/visorbus/controlvmchannel.h b/drivers/staging/unisys/visorbus/controlvmchannel.h index 03e36fb..0a0e221 100644 --- a/drivers/staging/unisys/visorbus/controlvmchannel.h +++ b/drivers/staging/unisys/visorbus/controlvmchannel.h @@ -16,7 +16,7 @@ #define __CONTROLVMCHANNEL_H__ #include -#include "channel.h" +#include /* {2B3C2D10-7EF5-4ad8-B966-3448B7386B3D} */ #define SPAR_CONTROLVM_CHANNEL_PROTOCOL_UUID \ diff --git a/drivers/staging/unisys/visorbus/vbuschannel.h b/drivers/staging/unisys/visorbus/vbuschannel.h index 90fa12e..3e0388d 100644 --- a/drivers/staging/unisys/visorbus/vbuschannel.h +++ b/drivers/staging/unisys/visorbus/vbuschannel.h @@ -23,8 +23,9 @@ * the client devices and client drivers for the server end to see. */ #include +#include + #include "vbusdeviceinfo.h" -#include "channel.h" /* {193b331b-c58f-11da-95a9-00e08161165f} */ #define SPAR_VBUS_CHANNEL_PROTOCOL_UUID \ diff --git a/drivers/staging/unisys/visorbus/visorbus_main.c b/drivers/staging/unisys/visorbus/visorbus_main.c index 0a537c7..ac480fb 100644 --- a/drivers/staging/unisys/visorbus/visorbus_main.c +++ b/drivers/staging/unisys/visorbus/visorbus_main.c @@ -16,11 +16,11 @@ #include -#include "visorbus.h" +#include +#include +#include #include "visorbus_private.h" -#include "version.h" #include "vbuschannel.h" -#include "guestlinuxdebug.h" #include "vmcallinterface.h" #define MYDRVNAME "visorbus" diff --git a/drivers/staging/unisys/visorbus/visorchannel.c b/drivers/staging/unisys/visorbus/visorchannel.c index 1b743d7.
Re: [PATCH 42/54] MAINTAINERS: Add file patterns for scsi device tree bindings
> "Geert" == Geert Uytterhoeven writes: Geert> Submitters of device tree binding documentation may forget to CC Geert> the subsystem maintainer if this is missing. Applied to 4.8/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
[PATCH v2 13/27] staging: unisys: visorbus: Make visordriver_callback_lock a mutex
From: Bryan Thompson visordriver_callback_lock is just a binary semaphore that logically makes more sense as a mutex. Signed-off-by: Bryan Thompson Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/include/visorbus.h | 3 ++- drivers/staging/unisys/visorbus/visorbus_main.c | 10 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/staging/unisys/include/visorbus.h b/drivers/staging/unisys/include/visorbus.h index 9bb88bb..9da25c0 100644 --- a/drivers/staging/unisys/include/visorbus.h +++ b/drivers/staging/unisys/include/visorbus.h @@ -161,7 +161,8 @@ struct visor_device { struct timer_list timer; bool timer_active; bool being_removed; - struct semaphore visordriver_callback_lock; + /* mutex to serialize visor_driver function callbacks */ + struct mutex visordriver_callback_lock; bool pausing; bool resuming; u32 chipset_bus_no; diff --git a/drivers/staging/unisys/visorbus/visorbus_main.c b/drivers/staging/unisys/visorbus/visorbus_main.c index 24b27ff..44609ee 100644 --- a/drivers/staging/unisys/visorbus/visorbus_main.c +++ b/drivers/staging/unisys/visorbus/visorbus_main.c @@ -574,7 +574,7 @@ visordriver_probe_device(struct device *xdev) if (!drv->probe) return -ENODEV; - down(&dev->visordriver_callback_lock); + mutex_lock(&dev->visordriver_callback_lock); dev->being_removed = false; res = drv->probe(dev); @@ -584,7 +584,7 @@ visordriver_probe_device(struct device *xdev) fix_vbus_dev_info(dev); } - up(&dev->visordriver_callback_lock); + mutex_unlock(&dev->visordriver_callback_lock); return res; } @@ -600,11 +600,11 @@ visordriver_remove_device(struct device *xdev) dev = to_visor_device(xdev); drv = to_visor_driver(xdev->driver); - down(&dev->visordriver_callback_lock); + mutex_lock(&dev->visordriver_callback_lock); dev->being_removed = true; if (drv->remove) drv->remove(dev); - up(&dev->visordriver_callback_lock); + mutex_unlock(&dev->visordriver_callback_lock); dev_stop_periodic_work(dev); put_device(&dev->device); @@ -764,7 +764,7 @@ create_visor_device(struct visor_device *dev) POSTCODE_LINUX_4(DEVICE_CREATE_ENTRY_PC, chipset_dev_no, chipset_bus_no, POSTCODE_SEVERITY_INFO); - sema_init(&dev->visordriver_callback_lock, 1); /* unlocked */ + mutex_init(&dev->visordriver_callback_lock); dev->device.bus = &visorbus_type; dev->device.groups = visorbus_channel_groups; device_initialize(&dev->device); -- 1.9.1
[PATCH v2 26/27] Documentation: Move visorbus documentation from staging to Documentation/
This patch simple does a git mv of the drivers/staging/unisys/Documentation directory to Documentation. Renames overview.txt to visorbus.txt and renames sysfs-platform-visorchipset to the correct name sysfs-bus-visorbus. Signed-off-by: David Kershner Reviewed-by: Tim Sell --- .../ABI/stable/sysfs-bus-visorbus | 0 .../unisys/Documentation/overview.txt => Documentation/visorbus.txt | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename drivers/staging/unisys/Documentation/ABI/sysfs-platform-visorchipset => Documentation/ABI/stable/sysfs-bus-visorbus (100%) rename drivers/staging/unisys/Documentation/overview.txt => Documentation/visorbus.txt (100%) diff --git a/drivers/staging/unisys/Documentation/ABI/sysfs-platform-visorchipset b/Documentation/ABI/stable/sysfs-bus-visorbus similarity index 100% rename from drivers/staging/unisys/Documentation/ABI/sysfs-platform-visorchipset rename to Documentation/ABI/stable/sysfs-bus-visorbus diff --git a/drivers/staging/unisys/Documentation/overview.txt b/Documentation/visorbus.txt similarity index 100% rename from drivers/staging/unisys/Documentation/overview.txt rename to Documentation/visorbus.txt -- 1.9.1
[PATCH v2 14/27] staging: unisys: visorbus: Remove unnecessary EXPORT_SYMBOL statements
From: Bryan Thompson The driver that is now visorbus started out as multiple separate drivers, and when they were merged the EXPORT_SYMBOL statements that were required for separate drivers were left in the code. This patch removes those now unnecessary exports. Signed-off-by: Bryan Thompson Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/visorbus/visorbus_main.c | 1 - drivers/staging/unisys/visorbus/visorchannel.c | 17 - drivers/staging/unisys/visorbus/visorchipset.c | 2 -- 3 files changed, 20 deletions(-) diff --git a/drivers/staging/unisys/visorbus/visorbus_main.c b/drivers/staging/unisys/visorbus/visorbus_main.c index 44609ee..247a9ad 100644 --- a/drivers/staging/unisys/visorbus/visorbus_main.c +++ b/drivers/staging/unisys/visorbus/visorbus_main.c @@ -721,7 +721,6 @@ visorbus_clear_channel(struct visor_device *dev, unsigned long offset, u8 ch, { return visorchannel_clear(dev->visorchannel, offset, ch, nbytes); } -EXPORT_SYMBOL_GPL(visorbus_clear_channel); /** We don't really have a real interrupt, so for now we just call the * interrupt function periodically... diff --git a/drivers/staging/unisys/visorbus/visorchannel.c b/drivers/staging/unisys/visorbus/visorchannel.c index 4337358..1f626c3 100644 --- a/drivers/staging/unisys/visorbus/visorchannel.c +++ b/drivers/staging/unisys/visorbus/visorchannel.c @@ -148,7 +148,6 @@ visorchannel_create(u64 physaddr, unsigned long channel_bytes, return visorchannel_create_guts(physaddr, channel_bytes, gfp, 0, guid, false); } -EXPORT_SYMBOL_GPL(visorchannel_create); struct visorchannel * visorchannel_create_with_lock(u64 physaddr, unsigned long channel_bytes, @@ -157,7 +156,6 @@ visorchannel_create_with_lock(u64 physaddr, unsigned long channel_bytes, return visorchannel_create_guts(physaddr, channel_bytes, gfp, 0, guid, true); } -EXPORT_SYMBOL_GPL(visorchannel_create_with_lock); void visorchannel_destroy(struct visorchannel *channel) @@ -171,21 +169,18 @@ visorchannel_destroy(struct visorchannel *channel) } kfree(channel); } -EXPORT_SYMBOL_GPL(visorchannel_destroy); u64 visorchannel_get_physaddr(struct visorchannel *channel) { return channel->physaddr; } -EXPORT_SYMBOL_GPL(visorchannel_get_physaddr); ulong visorchannel_get_nbytes(struct visorchannel *channel) { return channel->nbytes; } -EXPORT_SYMBOL_GPL(visorchannel_get_nbytes); char * visorchannel_uuid_id(uuid_le *guid, char *s) @@ -193,28 +188,24 @@ visorchannel_uuid_id(uuid_le *guid, char *s) sprintf(s, "%pUL", guid); return s; } -EXPORT_SYMBOL_GPL(visorchannel_uuid_id); char * visorchannel_id(struct visorchannel *channel, char *s) { return visorchannel_uuid_id(&channel->guid, s); } -EXPORT_SYMBOL_GPL(visorchannel_id); char * visorchannel_zoneid(struct visorchannel *channel, char *s) { return visorchannel_uuid_id(&channel->chan_hdr.zone_uuid, s); } -EXPORT_SYMBOL_GPL(visorchannel_zoneid); u64 visorchannel_get_clientpartition(struct visorchannel *channel) { return channel->chan_hdr.partition_handle; } -EXPORT_SYMBOL_GPL(visorchannel_get_clientpartition); int visorchannel_set_clientpartition(struct visorchannel *channel, @@ -223,7 +214,6 @@ visorchannel_set_clientpartition(struct visorchannel *channel, channel->chan_hdr.partition_handle = partition_handle; return 0; } -EXPORT_SYMBOL_GPL(visorchannel_set_clientpartition); uuid_le visorchannel_get_uuid(struct visorchannel *channel) @@ -243,7 +233,6 @@ visorchannel_read(struct visorchannel *channel, ulong offset, return 0; } -EXPORT_SYMBOL_GPL(visorchannel_read); int visorchannel_write(struct visorchannel *channel, ulong offset, @@ -265,7 +254,6 @@ visorchannel_write(struct visorchannel *channel, ulong offset, return 0; } -EXPORT_SYMBOL_GPL(visorchannel_write); int visorchannel_clear(struct visorchannel *channel, ulong offset, u8 ch, @@ -301,14 +289,12 @@ out_free_page: free_page((unsigned long)buf); return err; } -EXPORT_SYMBOL_GPL(visorchannel_clear); void __iomem * visorchannel_get_header(struct visorchannel *channel) { return (void __iomem *)&channel->chan_hdr; } -EXPORT_SYMBOL_GPL(visorchannel_get_header); /** Return offset of a specific SIGNAL_QUEUE_HEADER from the beginning of a * channel header @@ -522,7 +508,6 @@ visorchannel_signalqueue_slots_avail(struct visorchannel *channel, u32 queue) slots_avail = sig_hdr.max_signals - slots_used; return (int)slots_avail; } -EXPORT_SYMBOL_GPL(visorchannel_signalqueue_slots_avail); int visorchannel_signalqueue_max_slots(struct visorchannel *channel, u32 queue) @@ -533,7 +518,6 @@ visorchannel_signalqueue_max_slots(struct visorchannel *channel, u32 queue) return 0; return (int)sig_hdr.max
[PATCH v2 12/27] staging: unisys: visorbus: remove periodic_work.h/.c
From: Tim Sell These files were made no-longer-necessary by recent commits. Signed-off-by: Tim Sell Signed-off-by: David Kershner --- drivers/staging/unisys/include/periodic_work.h | 40 - drivers/staging/unisys/visorbus/Makefile| 1 - drivers/staging/unisys/visorbus/periodic_work.c | 204 drivers/staging/unisys/visorbus/visorchipset.c | 1 - 4 files changed, 246 deletions(-) delete mode 100644 drivers/staging/unisys/include/periodic_work.h delete mode 100644 drivers/staging/unisys/visorbus/periodic_work.c diff --git a/drivers/staging/unisys/include/periodic_work.h b/drivers/staging/unisys/include/periodic_work.h deleted file mode 100644 index 0b3335a..000 --- a/drivers/staging/unisys/include/periodic_work.h +++ /dev/null @@ -1,40 +0,0 @@ -/* periodic_work.h - * - * Copyright (C) 2010 - 2013 UNISYS CORPORATION - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - */ - -#ifndef __PERIODIC_WORK_H__ -#define __PERIODIC_WORK_H__ - -#include -#include - -/* PERIODIC_WORK an opaque structure to users. - * Fields are declared only in the implementation .c files. - */ -struct periodic_work; - -struct periodic_work * -visor_periodic_work_create(ulong jiffy_interval, - struct workqueue_struct *workqueue, - void (*workfunc)(void *), - void *workfuncarg, - const char *devnam); -void visor_periodic_work_destroy(struct periodic_work *pw); -bool visor_periodic_work_nextperiod(struct periodic_work *pw); -bool visor_periodic_work_start(struct periodic_work *pw); -bool visor_periodic_work_stop(struct periodic_work *pw); - -#endif diff --git a/drivers/staging/unisys/visorbus/Makefile b/drivers/staging/unisys/visorbus/Makefile index fc790e7..f3730d8 100644 --- a/drivers/staging/unisys/visorbus/Makefile +++ b/drivers/staging/unisys/visorbus/Makefile @@ -7,6 +7,5 @@ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus.o visorbus-y := visorbus_main.o visorbus-y += visorchannel.o visorbus-y += visorchipset.o -visorbus-y += periodic_work.o ccflags-y += -Idrivers/staging/unisys/include diff --git a/drivers/staging/unisys/visorbus/periodic_work.c b/drivers/staging/unisys/visorbus/periodic_work.c deleted file mode 100644 index 00b1527..000 --- a/drivers/staging/unisys/visorbus/periodic_work.c +++ /dev/null @@ -1,204 +0,0 @@ -/* periodic_work.c - * - * Copyright (C) 2010 - 2015 UNISYS CORPORATION - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - */ - -/* - * Helper functions to schedule periodic work in Linux kernel mode. - */ -#include - -#include "periodic_work.h" - -#define MYDRVNAME "periodic_work" - -struct periodic_work { - rwlock_t lock; - struct delayed_work work; - void (*workfunc)(void *); - void *workfuncarg; - bool is_scheduled; - bool want_to_stop; - ulong jiffy_interval; - struct workqueue_struct *workqueue; - const char *devnam; -}; - -static void periodic_work_func(struct work_struct *work) -{ - struct periodic_work *pw; - - pw = container_of(work, struct periodic_work, work.work); - (*pw->workfunc)(pw->workfuncarg); -} - -struct periodic_work -*visor_periodic_work_create(ulong jiffy_interval, - struct workqueue_struct *workqueue, - void (*workfunc)(void *), - void *workfuncarg, - const char *devnam) -{ - struct periodic_work *pw; - - pw = kzalloc(sizeof(*pw), GFP_KERNEL | __GFP_NORETRY); - if (!pw) - return NULL; - - rwlock_init(&pw->lock); - pw->jiffy_interval = jiffy_interval; - pw->workqueue = workqueue; - pw->workfunc = workfunc; - pw->workfuncarg = workfuncarg; - pw->devnam = devnam; - return pw; -} -EXPORT_SYMBOL_GPL(visor_periodic_work_create); - -void visor_periodic_work_destroy(struct periodic_work *pw) -{ - kfree(
[PATCH v2 17/27] staging: unisys: visorbus: vbusdeviceinfo function descriptions more kerneldoc-like
From: David Binder Per audit feedback from Thomas Gleixner, function descriptions in vbusdeviceinfo.h now utilize a more kerneldoc-like formatting. The affected comments do not implement other kerneldoc requirements. Signed-off-by: David Binder Signed-off-by: David Kershner Reviewed-by: Tim Sell --- drivers/staging/unisys/visorbus/vbusdeviceinfo.h | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/staging/unisys/visorbus/vbusdeviceinfo.h b/drivers/staging/unisys/visorbus/vbusdeviceinfo.h index abdab4a..010ea68 100644 --- a/drivers/staging/unisys/visorbus/vbusdeviceinfo.h +++ b/drivers/staging/unisys/visorbus/vbusdeviceinfo.h @@ -34,7 +34,8 @@ struct ultra_vbus_deviceinfo { #pragma pack(pop) -/* Reads chars from the buffer at for bytes, and writes to +/** + * Reads chars from the buffer at for bytes, and writes to * the buffer at , which is bytes long, ensuring never to * overflow the buffer at , using the following rules: * - printable characters are simply copied from the buffer at to the @@ -92,7 +93,8 @@ vbuschannel_sanitize_buffer(char *p, int remain, char *src, int srcmax) p++; chars++; remain--; \ } while (0) -/* Converts the non-negative value at to an ascii decimal string +/** + * Converts the non-negative value at to an ascii decimal string * at , writing at most bytes. Note there is NO '\0' termination * written to . * @@ -141,8 +143,9 @@ vbuschannel_itoa(char *p, int remain, int num) return digits; } -/* Reads , and converts its contents to a printable string at , - * writing at most bytes. Note there is NO '\0' termination +/** + * Reads , and converts its contents to a printable string at , + * writing at most bytes. Note there is NO '\0' termination * written to . * * Pass >= 0 if you want a device index presented. -- 1.9.1