Re: [REPOST] [PATCH 0/6] fixes and MPC8308 support for the mpc512x_dma driver
Hi Piotr, On 27.10.2010 11:24, Piotr Zięcik wrote: Currently I am not able to deal with this as I am much involved in other development. I see. Excuse me for disturbing you then. Guys, anybody can review/test/pull these patches? Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/6] mpc512x_dma: scatter/gather fix
While testing mpc512x-dma driver with dmatest module I've found that I can hang the mpc512x-dma issueing request from multiple threads to the single channel. (insmod dmatest.ko max_channels=1 threads_per_chan=16) After investingating this case I've managed to find that this happens if and only if we have more than one quequed requests. In this case the driver tries to make use of hardware scatter/gather functionality. I've found two problems with scatter/gather: 1. When TCD is copied form RAM to the TCD register space with memcpy_io() e_sg bit eventually gets cleared. This results in only first TCD being executed. I've added setting of e_sg bit excplicitly in the TCD registers. BTW, what is the correct way to do this? (How can I use setbits with bitfield structure?) After that hardware loads consecutive TCDs and we hit the second issue. 2. Existing code clears int_maj bit in the last TCD so we never get an interrupt on transfefr completion. With these fixes my tests with many threads of single channel succeed but tests that use many channels simultaneously still don't work reliable. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/mpc512x_dma.c |4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 4e9cbf3..1bc04aa 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -252,11 +252,13 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan) prev = mdesc; } - prev-tcd-start = 0; prev-tcd-int_maj = 1; /* Send first descriptor in chain into hardware */ memcpy_toio(mdma-tcd[cid], first-tcd, sizeof(struct mpc_dma_tcd)); + + if (first != prev) + mdma-tcd[cid].e_sg = 1; out_8(mdma-regs-dmassrt, cid); } -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/6] mpc512x_dma: add MPC8308 support
MPC8308 has pretty much the same DMA controller as MPC5121 and this patch adds support for MPC8308 to the mpc512x_dma driver. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/Kconfig |2 +- drivers/dma/mpc512x_dma.c | 95 +--- 2 files changed, 72 insertions(+), 25 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 9520cf0..5c5e95b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -100,7 +100,7 @@ config FSL_DMA config MPC512X_DMA tristate Freescale MPC512x built-in DMA engine support - depends on PPC_MPC512x + depends on PPC_MPC512x || PPC_MPC831x select DMA_ENGINE ---help--- Enable support for the Freescale MPC512x built-in DMA engine. diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 0717527..97b92ec 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -1,6 +1,7 @@ /* * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008. * Copyright (C) Semihalf 2009 + * Copyright (C) Ilya Yanok, Emcraft Systems 2010 * * Written by Piotr Ziecik ko...@semihalf.com. Hardware description * (defines, structures and comments) was taken from MPC5121 DMA driver @@ -70,6 +71,8 @@ #define MPC_DMA_DMAES_SBE (1 1) #define MPC_DMA_DMAES_DBE (1 0) +#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 6) + #define MPC_DMA_TSIZE_10x00 #define MPC_DMA_TSIZE_20x01 #define MPC_DMA_TSIZE_40x02 @@ -104,7 +107,10 @@ struct __attribute__ ((__packed__)) mpc_dma_regs { /* 0x30 */ u32 dmahrsh;/* DMA hw request status high(ch63~32) */ u32 dmahrsl;/* DMA hardware request status low(ch31~0) */ - u32 dmaihsa;/* DMA interrupt high select AXE(ch63~32) */ + union { + u32 dmaihsa;/* DMA interrupt high select AXE(ch63~32) */ + u32 dmagpor;/* (General purpose register on MPC8308) */ + }; u32 dmailsa;/* DMA interrupt low select AXE(ch31~0) */ /* 0x40 ~ 0xff */ u32 reserve0[48]; /* Reserved */ @@ -195,7 +201,9 @@ struct mpc_dma { struct mpc_dma_regs __iomem *regs; struct mpc_dma_tcd __iomem *tcd; int irq; + int irq2; uinterror_status; + int is_mpc8308; /* Lock for error_status field in this structure */ spinlock_t error_status_lock; @@ -307,8 +315,10 @@ static irqreturn_t mpc_dma_irq(int irq, void *data) spin_unlock(mdma-error_status_lock); /* Handle interrupt on each channel */ - mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmainth), + if (mdma-dma.chancnt 32) { + mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmainth), in_be32(mdma-regs-dmaerrh), 32); + } mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmaintl), in_be32(mdma-regs-dmaerrl), 0); @@ -562,6 +572,7 @@ static struct dma_async_tx_descriptor * mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags) { + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); struct mpc_dma_desc *mdesc = NULL; struct mpc_dma_tcd *tcd; @@ -590,7 +601,8 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, tcd-dsize = MPC_DMA_TSIZE_32; tcd-soff = 32; tcd-doff = 32; - } else if (IS_ALIGNED(src | dst | len, 16)) { + } else if (!mdma-is_mpc8308 IS_ALIGNED(src | dst | len, 16)) { + /* MPC8308 doesn't support 16 byte transfers */ tcd-ssize = MPC_DMA_TSIZE_16; tcd-dsize = MPC_DMA_TSIZE_16; tcd-soff = 16; @@ -650,6 +662,15 @@ static int __devinit mpc_dma_probe(struct platform_device *op, return -EINVAL; } + if (of_device_is_compatible(dn, fsl,mpc8308-dma)) { + mdma-is_mpc8308 = 1; + mdma-irq2 = irq_of_parse_and_map(dn, 1); + if (mdma-irq2 == NO_IRQ) { + dev_err(dev, Error mapping IRQ!\n); + return -EINVAL; + } + } + retval = of_address_to_resource(dn, 0, res); if (retval) { dev_err(dev, Error parsing memory region!\n); @@ -680,11 +701,23 @@ static int __devinit mpc_dma_probe(struct platform_device *op, return -EINVAL; } + if (mdma-is_mpc8308) { + retval = devm_request_irq(dev, mdma-irq2, mpc_dma_irq, 0
[PATCH 6/6] mpc8308_p1m: add DMA controller device-tree node
MPC8308 has DMA controller compatible with mpc512x_dma driver. This patch adds device-tree node to support DMA controller on MPC8308 P1M board. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/boot/dts/mpc8308_p1m.dts |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts index 05a76cc..697b3f6 100644 --- a/arch/powerpc/boot/dts/mpc8308_p1m.dts +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -297,6 +297,14 @@ interrupt-parent = ipic ; }; + d...@2c000 { + compatible = fsl,mpc8308-dma, fsl,mpc5121-dma; + reg = 0x2c000 0x1800; + interrupts = 3 0x8 + 94 0x8; + interrupt-parent = ipic ; + }; + }; pci0: p...@e0009000 { -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 5/6] MPC8308RDB: add DMA controller device-tree node
MPC8308 has DMA controller compatible with mpc512x_dma driver. This patch adds device-tree node to support DMA controller on MPC8308RDB board. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/boot/dts/mpc8308rdb.dts |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts index 1e2b888..a0bd188 100644 --- a/arch/powerpc/boot/dts/mpc8308rdb.dts +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts @@ -265,6 +265,14 @@ interrupt-parent = ipic ; }; + d...@2c000 { + compatible = fsl,mpc8308-dma, fsl,mpc5121-dma; + reg = 0x2c000 0x1800; + interrupts = 3 0x8 + 94 0x8; + interrupt-parent = ipic ; + }; + }; pci0: p...@e0009000 { -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[REPOST] [PATCH 0/6] fixes and MPC8308 support for the mpc512x_dma driver
Hello everybody, meanwhile I've fixed one more issue in mpc512x_dma driver. Any comments? Anybody interrested in this driver? Piotr? Still unsure how to deal with bitfield structures in IO space... Regards, Ilya. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/6] mpc512x_dma: fix the hanged transfer issue
Current code clears interrupt active status _after_ submiting new transfers. This leaves a possibility of clearing the interrupt for this new transfer (if it is triggered fast enough) and thus lose this interrupt. We want to clear interrupt active status _before_ new transfers is submited and for current channel only. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/mpc512x_dma.c |9 +++-- 1 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 1bc04aa..0717527 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -276,6 +276,9 @@ static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off) spin_lock(mchan-lock); + out_8(mdma-regs-dmacint, ch + off); + out_8(mdma-regs-dmacerr, ch + off); + /* Check error status */ if (es (1 ch)) list_for_each_entry(mdesc, mchan-active, node) @@ -309,12 +312,6 @@ static irqreturn_t mpc_dma_irq(int irq, void *data) mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmaintl), in_be32(mdma-regs-dmaerrl), 0); - /* Ack interrupt on all channels */ - out_be32(mdma-regs-dmainth, 0x); - out_be32(mdma-regs-dmaintl, 0x); - out_be32(mdma-regs-dmaerrh, 0x); - out_be32(mdma-regs-dmaerrl, 0x); - /* Schedule tasklet */ tasklet_schedule(mdma-tasklet); -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 4/6] mpc512x_dma: try to free descriptors in case of allocation failure
Currently completed descriptors are processed in the tasklet. This can lead to dead lock in case of CONFIG_NET_DMA enabled (new requests are submitted from softirq context and dma_memcpy_to_iovec() busy loops until the requests is submitted). To prevent this we should process completed descriptors from the allocation failure path in prepare_memcpy too. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/mpc512x_dma.c | 79 +--- 1 files changed, 45 insertions(+), 34 deletions(-) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 97b92ec..59c2701 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -328,19 +328,55 @@ static irqreturn_t mpc_dma_irq(int irq, void *data) return IRQ_HANDLED; } -/* DMA Tasklet */ -static void mpc_dma_tasklet(unsigned long data) +/* proccess completed descriptors */ +static void mpc_dma_process_completed(struct mpc_dma *mdma) { - struct mpc_dma *mdma = (void *)data; dma_cookie_t last_cookie = 0; struct mpc_dma_chan *mchan; struct mpc_dma_desc *mdesc; struct dma_async_tx_descriptor *desc; unsigned long flags; LIST_HEAD(list); - uint es; int i; + for (i = 0; i mdma-dma.chancnt; i++) { + mchan = mdma-channels[i]; + + /* Get all completed descriptors */ + spin_lock_irqsave(mchan-lock, flags); + if (!list_empty(mchan-completed)) + list_splice_tail_init(mchan-completed, list); + spin_unlock_irqrestore(mchan-lock, flags); + + if (list_empty(list)) + continue; + + /* Execute callbacks and run dependencies */ + list_for_each_entry(mdesc, list, node) { + desc = mdesc-desc; + + if (desc-callback) + desc-callback(desc-callback_param); + + last_cookie = desc-cookie; + dma_run_dependencies(desc); + } + + /* Free descriptors */ + spin_lock_irqsave(mchan-lock, flags); + list_splice_tail_init(list, mchan-free); + mchan-completed_cookie = last_cookie; + spin_unlock_irqrestore(mchan-lock, flags); + } +} + +/* DMA Tasklet */ +static void mpc_dma_tasklet(unsigned long data) +{ + struct mpc_dma *mdma = (void *)data; + unsigned long flags; + uint es; + spin_lock_irqsave(mdma-error_status_lock, flags); es = mdma-error_status; mdma-error_status = 0; @@ -379,35 +415,7 @@ static void mpc_dma_tasklet(unsigned long data) dev_err(mdma-dma.dev, - Destination Bus Error\n); } - for (i = 0; i mdma-dma.chancnt; i++) { - mchan = mdma-channels[i]; - - /* Get all completed descriptors */ - spin_lock_irqsave(mchan-lock, flags); - if (!list_empty(mchan-completed)) - list_splice_tail_init(mchan-completed, list); - spin_unlock_irqrestore(mchan-lock, flags); - - if (list_empty(list)) - continue; - - /* Execute callbacks and run dependencies */ - list_for_each_entry(mdesc, list, node) { - desc = mdesc-desc; - - if (desc-callback) - desc-callback(desc-callback_param); - - last_cookie = desc-cookie; - dma_run_dependencies(desc); - } - - /* Free descriptors */ - spin_lock_irqsave(mchan-lock, flags); - list_splice_tail_init(list, mchan-free); - mchan-completed_cookie = last_cookie; - spin_unlock_irqrestore(mchan-lock, flags); - } + mpc_dma_process_completed(mdma); } /* Submit descriptor to hardware */ @@ -587,8 +595,11 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, } spin_unlock_irqrestore(mchan-lock, iflags); - if (!mdesc) + if (!mdesc) { + /* try to free completed descriptors */ + mpc_dma_process_completed(mdma); return NULL; + } mdesc-error = 0; tcd = mdesc-tcd; -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[REPOST][PATCH] mpc8308: fix USB DR controller initialization
MPC8308 has ULPI pin muxing settings in SICRH register, bits 17-18 which is different from both MPC8313 and MPC8315. Also MPC8308 doesn't have REFSEL, UTMI_PHY_EN and OTG_PORT fields in the USB DR controller CONTROL register. Signed-off-by: Ilya Yanok ya...@emcraft.com --- Kim, Kumar, Please consider including this patch. Without it USB initialization code writes to the wrong bits on the MPC8308RDB. Regards, Ilya. arch/powerpc/boot/dts/mpc8308rdb.dts |2 +- arch/powerpc/platforms/83xx/mpc83xx.h |2 ++ arch/powerpc/platforms/83xx/usb.c | 21 - 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts index a97eb2d..1e2b888 100644 --- a/arch/powerpc/boot/dts/mpc8308rdb.dts +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts @@ -109,7 +109,7 @@ #address-cells = 1; #size-cells = 1; device_type = soc; - compatible = fsl,mpc8315-immr, simple-bus; + compatible = fsl,mpc8308-immr, simple-bus; ranges = 0 0xe000 0x0010; reg = 0xe000 0x0200; bus-frequency = 0; diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index 0fea881..82a4345 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -35,6 +35,8 @@ /* system i/o configuration register high */ #define MPC83XX_SICRH_OFFS 0x118 +#define MPC8308_SICRH_USB_MASK 0x000c +#define MPC8308_SICRH_USB_ULPI 0x0004 #define MPC834X_SICRH_USB_UTMI 0x0002 #define MPC831X_SICRH_USB_MASK 0x00e0 #define MPC831X_SICRH_USB_ULPI 0x00a0 diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 3ba4bb7..2c64164 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -127,7 +127,8 @@ int mpc831x_usb_cfg(void) /* Configure clock */ immr_node = of_get_parent(np); - if (immr_node of_device_is_compatible(immr_node, fsl,mpc8315-immr)) + if (immr_node (of_device_is_compatible(immr_node, fsl,mpc8315-immr) || + of_device_is_compatible(immr_node, fsl,mpc8308-immr))) clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC8315_SCCR_USB_MASK, MPC8315_SCCR_USB_DRCM_01); @@ -138,7 +139,11 @@ int mpc831x_usb_cfg(void) /* Configure pin mux for ULPI. There is no pin mux for UTMI */ if (prop !strcmp(prop, ulpi)) { - if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) { + if (of_device_is_compatible(immr_node, fsl,mpc8308-immr)) { + clrsetbits_be32(immap + MPC83XX_SICRH_OFFS, + MPC8308_SICRH_USB_MASK, + MPC8308_SICRH_USB_ULPI); + } else if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) { clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC8315_SICRL_USB_MASK, MPC8315_SICRL_USB_ULPI); @@ -173,6 +178,9 @@ int mpc831x_usb_cfg(void) !strcmp(prop, utmi))) { u32 refsel; + if (of_device_is_compatible(immr_node, fsl,mpc8308-immr)) + goto out; + if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) refsel = CONTROL_REFSEL_24MHZ; else @@ -186,9 +194,11 @@ int mpc831x_usb_cfg(void) temp = CONTROL_PHY_CLK_SEL_ULPI; #ifdef CONFIG_USB_OTG /* Set OTG_PORT */ - dr_mode = of_get_property(np, dr_mode, NULL); - if (dr_mode !strcmp(dr_mode, otg)) - temp |= CONTROL_OTG_PORT; + if (!of_device_is_compatible(immr_node, fsl,mpc8308-immr)) { + dr_mode = of_get_property(np, dr_mode, NULL); + if (dr_mode !strcmp(dr_mode, otg)) + temp |= CONTROL_OTG_PORT; + } #endif /* CONFIG_USB_OTG */ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp); } else { @@ -196,6 +206,7 @@ int mpc831x_usb_cfg(void) ret = -EINVAL; } +out: iounmap(usb_regs); of_node_put(np); return ret; -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3] mpc512x_dma: add MPC8308 support
MPC8308 has pretty much the same DMA controller as MPC5121 and this patch adds support for MPC8308 to the mpc512x_dma driver. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/Kconfig |2 +- drivers/dma/mpc512x_dma.c | 95 +--- 2 files changed, 72 insertions(+), 25 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 9520cf0..5c5e95b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -100,7 +100,7 @@ config FSL_DMA config MPC512X_DMA tristate Freescale MPC512x built-in DMA engine support - depends on PPC_MPC512x + depends on PPC_MPC512x || PPC_MPC831x select DMA_ENGINE ---help--- Enable support for the Freescale MPC512x built-in DMA engine. diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 0717527..97b92ec 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -1,6 +1,7 @@ /* * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008. * Copyright (C) Semihalf 2009 + * Copyright (C) Ilya Yanok, Emcraft Systems 2010 * * Written by Piotr Ziecik ko...@semihalf.com. Hardware description * (defines, structures and comments) was taken from MPC5121 DMA driver @@ -70,6 +71,8 @@ #define MPC_DMA_DMAES_SBE (1 1) #define MPC_DMA_DMAES_DBE (1 0) +#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 6) + #define MPC_DMA_TSIZE_10x00 #define MPC_DMA_TSIZE_20x01 #define MPC_DMA_TSIZE_40x02 @@ -104,7 +107,10 @@ struct __attribute__ ((__packed__)) mpc_dma_regs { /* 0x30 */ u32 dmahrsh;/* DMA hw request status high(ch63~32) */ u32 dmahrsl;/* DMA hardware request status low(ch31~0) */ - u32 dmaihsa;/* DMA interrupt high select AXE(ch63~32) */ + union { + u32 dmaihsa;/* DMA interrupt high select AXE(ch63~32) */ + u32 dmagpor;/* (General purpose register on MPC8308) */ + }; u32 dmailsa;/* DMA interrupt low select AXE(ch31~0) */ /* 0x40 ~ 0xff */ u32 reserve0[48]; /* Reserved */ @@ -195,7 +201,9 @@ struct mpc_dma { struct mpc_dma_regs __iomem *regs; struct mpc_dma_tcd __iomem *tcd; int irq; + int irq2; uinterror_status; + int is_mpc8308; /* Lock for error_status field in this structure */ spinlock_t error_status_lock; @@ -307,8 +315,10 @@ static irqreturn_t mpc_dma_irq(int irq, void *data) spin_unlock(mdma-error_status_lock); /* Handle interrupt on each channel */ - mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmainth), + if (mdma-dma.chancnt 32) { + mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmainth), in_be32(mdma-regs-dmaerrh), 32); + } mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmaintl), in_be32(mdma-regs-dmaerrl), 0); @@ -562,6 +572,7 @@ static struct dma_async_tx_descriptor * mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len, unsigned long flags) { + struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan); struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan); struct mpc_dma_desc *mdesc = NULL; struct mpc_dma_tcd *tcd; @@ -590,7 +601,8 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src, tcd-dsize = MPC_DMA_TSIZE_32; tcd-soff = 32; tcd-doff = 32; - } else if (IS_ALIGNED(src | dst | len, 16)) { + } else if (!mdma-is_mpc8308 IS_ALIGNED(src | dst | len, 16)) { + /* MPC8308 doesn't support 16 byte transfers */ tcd-ssize = MPC_DMA_TSIZE_16; tcd-dsize = MPC_DMA_TSIZE_16; tcd-soff = 16; @@ -650,6 +662,15 @@ static int __devinit mpc_dma_probe(struct platform_device *op, return -EINVAL; } + if (of_device_is_compatible(dn, fsl,mpc8308-dma)) { + mdma-is_mpc8308 = 1; + mdma-irq2 = irq_of_parse_and_map(dn, 1); + if (mdma-irq2 == NO_IRQ) { + dev_err(dev, Error mapping IRQ!\n); + return -EINVAL; + } + } + retval = of_address_to_resource(dn, 0, res); if (retval) { dev_err(dev, Error parsing memory region!\n); @@ -680,11 +701,23 @@ static int __devinit mpc_dma_probe(struct platform_device *op, return -EINVAL; } + if (mdma-is_mpc8308) { + retval = devm_request_irq(dev, mdma-irq2, mpc_dma_irq, 0
[PATCH 2/3] mpc512x_dma: fix the hanged transfer issue
Current code clears interrupt active status _after_ submiting new transfers. This leaves a possibility of clearing the interrupt for this new transfer (if it is triggered fast enough) and thus lose this interrupt. We want to clear interrupt active status _before_ new transfers is submited and for current channel only. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/mpc512x_dma.c |9 +++-- 1 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 1bc04aa..0717527 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -276,6 +276,9 @@ static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off) spin_lock(mchan-lock); + out_8(mdma-regs-dmacint, ch + off); + out_8(mdma-regs-dmacerr, ch + off); + /* Check error status */ if (es (1 ch)) list_for_each_entry(mdesc, mchan-active, node) @@ -309,12 +312,6 @@ static irqreturn_t mpc_dma_irq(int irq, void *data) mpc_dma_irq_process(mdma, in_be32(mdma-regs-dmaintl), in_be32(mdma-regs-dmaerrl), 0); - /* Ack interrupt on all channels */ - out_be32(mdma-regs-dmainth, 0x); - out_be32(mdma-regs-dmaintl, 0x); - out_be32(mdma-regs-dmaerrh, 0x); - out_be32(mdma-regs-dmaerrl, 0x); - /* Schedule tasklet */ tasklet_schedule(mdma-tasklet); -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/3] mpc512x_dma: scatter/gather fix
While testing mpc512x-dma driver with dmatest module I've found that I can hang the mpc512x-dma issueing request from multiple threads to the single channel. (insmod dmatest.ko max_channels=1 threads_per_chan=16) After investingating this case I've managed to find that this happens if and only if we have more than one quequed requests. In this case the driver tries to make use of hardware scatter/gather functionality. I've found two problems with scatter/gather: 1. When TCD is copied form RAM to the TCD register space with memcpy_io() e_sg bit eventually gets cleared. This results in only first TCD being executed. I've added setting of e_sg bit excplicitly in the TCD registers. BTW, what is the correct way to do this? (How can I use setbits with bitfield structure?) After that hardware loads consecutive TCDs and we hit the second issue. 2. Existing code clears int_maj bit in the last TCD so we never get an interrupt on transfefr completion. With these fixes my tests with many threads of single channel succeed but tests that use many channels simultaneously still don't work reliable. Signed-off-by: Ilya Yanok ya...@emcraft.com Cc: Piotr Ziecik ko...@semihalf.com --- drivers/dma/mpc512x_dma.c |4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 4e9cbf3..1bc04aa 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -252,11 +252,13 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan) prev = mdesc; } - prev-tcd-start = 0; prev-tcd-int_maj = 1; /* Send first descriptor in chain into hardware */ memcpy_toio(mdma-tcd[cid], first-tcd, sizeof(struct mpc_dma_tcd)); + + if (first != prev) + mdma-tcd[cid].e_sg = 1; out_8(mdma-regs-dmassrt, cid); } -- 1.7.2.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] mpc512x_dma: add MPC8308 support
Dear Wolfgang, 28.09.2010 17:09, Wolfgang Denk wrote: config MPC512X_DMA tristate Freescale MPC512x built-in DMA engine support - depends on PPC_MPC512x + depends on PPC_MPC512x || PPC_MPC831x Is MPC831x correct here? My understanding is that MPC831x processors have yet other DMA cotnrollers, and we're on a MPC8308 here? Well, PPC_MPC831x is not correct here in the strict sense, but there are some reasons for it: 1. We don't really have PPC_MPC8308 config option for MPC8308 processor. Well, maybe that was my fault that I didn't add it when I initially introduced support for MPC8308. But I don't actually see the point for it. All the differencies from MPC831x are handled run-time based on device-tree. 2. Some of MPC831x (I believe it's MPC8315) really has the compatible DMA controller (it's called something like DMA controller of the TDM module). Well it will probably need some additional work in the driver to support this controller but hardware is mostly the same. 3. Well, it's only compilation option you need a proper device-tree node for the driver to start. Ok, you can make your kernel bigger by compiling in the driver which is useless for your CPU but you can't break it provided you have a correct device-tree. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/2] PPC4xx: Generelizing drivers/dma/ppc4xx/adma.c
Hi Tirumala, drivers/dma/ppc4xx/adma.c| 4370 +++--- drivers/dma/ppc4xx/adma.h| 116 +- drivers/dma/ppc4xx/ppc4xx-adma.h | 4020 +++ You've moved tons of code to the header file. Why? Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH][v2] mpc8308_p1m: support for MPC8308 P1M board
This patch adds support for MPC8308 P1M board. Supported devices: DUART Dual Ethernet NOR flash Both I2C controllers USB in peripheral mode PCI Express Signed-off-by: Ilya Yanok ya...@emcraft.com --- Scott's comments addressed. arch/powerpc/boot/dts/mpc8308_p1m.dts | 332 + arch/powerpc/platforms/83xx/Kconfig |4 +- arch/powerpc/platforms/83xx/mpc830x_rdb.c |3 +- 3 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/boot/dts/mpc8308_p1m.dts diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts new file mode 100644 index 000..97cb691 --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -0,0 +1,332 @@ +/* + * mpc8308_p1m Device Tree Source + * + * Copyright 2010 Ilya Yanok, Emcraft Systems, ya...@emcraft.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; + +/ { + compatible = denx,mpc8308_p1m; + #address-cells = 1; + #size-cells = 1; + + aliases { + ethernet0 = enet0; + ethernet1 = enet1; + serial0 = serial0; + serial1 = serial1; + pci0 = pci0; + }; + + cpus { + #address-cells = 1; + #size-cells = 0; + + PowerPC,8...@0 { + device_type = cpu; + reg = 0x0; + d-cache-line-size = 32; + i-cache-line-size = 32; + d-cache-size = 16384; + i-cache-size = 16384; + timebase-frequency = 0; // from bootloader + bus-frequency = 0;// from bootloader + clock-frequency = 0; // from bootloader + }; + }; + + memory { + device_type = memory; + reg = 0x 0x0800; // 128MB at 0 + }; + + local...@e0005000 { + #address-cells = 2; + #size-cells = 1; + compatible = fsl,mpc8315-elbc, fsl,elbc, simple-bus; + reg = 0xe0005000 0x1000; + interrupts = 77 0x8; + interrupt-parent = ipic; + + ranges = 0x0 0x0 0xfc00 0x0400 + 0x1 0x0 0xfbff 0x8000 + 0x2 0x0 0xfbff8000 0x8000; + + fl...@0,0 { + #address-cells = 1; + #size-cells = 1; + compatible = cfi-flash; + reg = 0x0 0x0 0x400; + bank-width = 2; + device-width = 1; + + u-b...@0 { + reg = 0x0 0x6; + read-only; + }; + e...@6 { + reg = 0x6 0x2; + }; + e...@8 { + reg = 0x8 0x2; + }; + ker...@a { + reg = 0xa 0x20; + }; + d...@2a { + reg = 0x2a 0x2; + }; + ramd...@2c { + reg = 0x2c 0x64; + }; + u...@70 { + reg = 0x70 0x390; + }; + }; + + c...@1,0 { + compatible = nxp,sja1000; + reg = 0x1 0x0 0x80; + interrupts = 18 0x8; + interrups-parent = ipic; + }; + + c...@2,0 { + compatible = cpld; + reg = 0x2 0x0 0x8; + interrupts = 48 0x8; + interrups-parent = ipic; + }; + }; + + i...@e000 { + #address-cells = 1; + #size-cells = 1; + device_type = soc; + compatible = fsl,mpc8308-immr, simple-bus; + ranges = 0 0xe000 0x0010; + reg = 0xe000 0x0200; + bus-frequency = 0; + + i...@3000 { + #address-cells = 1; + #size-cells = 0; + compatible = fsl-i2c; + reg = 0x3000 0x100; + interrupts = 14 0x8; + interrupt-parent = ipic; + dfsrr
[PATCH][v3] mpc8308_p1m: support for MPC8308 P1M board
This patch adds support for MPC8308 P1M board. Supported devices: DUART Dual Ethernet NOR flash Both I2C controllers USB in peripheral mode PCI Express Signed-off-by: Ilya Yanok ya...@emcraft.com --- Changed 'compatible' entry for 'cpld' node to denx,mpc8308_p1m-cpld arch/powerpc/boot/dts/mpc8308_p1m.dts | 332 + arch/powerpc/platforms/83xx/Kconfig |4 +- arch/powerpc/platforms/83xx/mpc830x_rdb.c |3 +- 3 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/boot/dts/mpc8308_p1m.dts diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts new file mode 100644 index 000..05a76cc --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -0,0 +1,332 @@ +/* + * mpc8308_p1m Device Tree Source + * + * Copyright 2010 Ilya Yanok, Emcraft Systems, ya...@emcraft.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; + +/ { + compatible = denx,mpc8308_p1m; + #address-cells = 1; + #size-cells = 1; + + aliases { + ethernet0 = enet0; + ethernet1 = enet1; + serial0 = serial0; + serial1 = serial1; + pci0 = pci0; + }; + + cpus { + #address-cells = 1; + #size-cells = 0; + + PowerPC,8...@0 { + device_type = cpu; + reg = 0x0; + d-cache-line-size = 32; + i-cache-line-size = 32; + d-cache-size = 16384; + i-cache-size = 16384; + timebase-frequency = 0; // from bootloader + bus-frequency = 0;// from bootloader + clock-frequency = 0; // from bootloader + }; + }; + + memory { + device_type = memory; + reg = 0x 0x0800; // 128MB at 0 + }; + + local...@e0005000 { + #address-cells = 2; + #size-cells = 1; + compatible = fsl,mpc8315-elbc, fsl,elbc, simple-bus; + reg = 0xe0005000 0x1000; + interrupts = 77 0x8; + interrupt-parent = ipic; + + ranges = 0x0 0x0 0xfc00 0x0400 + 0x1 0x0 0xfbff 0x8000 + 0x2 0x0 0xfbff8000 0x8000; + + fl...@0,0 { + #address-cells = 1; + #size-cells = 1; + compatible = cfi-flash; + reg = 0x0 0x0 0x400; + bank-width = 2; + device-width = 1; + + u-b...@0 { + reg = 0x0 0x6; + read-only; + }; + e...@6 { + reg = 0x6 0x2; + }; + e...@8 { + reg = 0x8 0x2; + }; + ker...@a { + reg = 0xa 0x20; + }; + d...@2a { + reg = 0x2a 0x2; + }; + ramd...@2c { + reg = 0x2c 0x64; + }; + u...@70 { + reg = 0x70 0x390; + }; + }; + + c...@1,0 { + compatible = nxp,sja1000; + reg = 0x1 0x0 0x80; + interrupts = 18 0x8; + interrups-parent = ipic; + }; + + c...@2,0 { + compatible = denx,mpc8308_p1m-cpld; + reg = 0x2 0x0 0x8; + interrupts = 48 0x8; + interrups-parent = ipic; + }; + }; + + i...@e000 { + #address-cells = 1; + #size-cells = 1; + device_type = soc; + compatible = fsl,mpc8308-immr, simple-bus; + ranges = 0 0xe000 0x0010; + reg = 0xe000 0x0200; + bus-frequency = 0; + + i...@3000 { + #address-cells = 1; + #size-cells = 0; + compatible = fsl-i2c; + reg = 0x3000 0x100; + interrupts = 14 0x8; + interrupt
[PATCH] mpc8308_p1m: support for MPC8308 P1M board
This patch adds support for MPC8308 P1M board. Supported devices: DUART Dual Ethernet NOR flash Both I2C controllers USB in peripheral mode PCI Express Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/boot/dts/mpc8308_p1m.dts | 340 + arch/powerpc/platforms/83xx/Kconfig |4 +- arch/powerpc/platforms/83xx/mpc830x_rdb.c |3 +- 3 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/boot/dts/mpc8308_p1m.dts diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts new file mode 100644 index 000..159a0d0 --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts @@ -0,0 +1,340 @@ +/* + * mpc8308_p1m Device Tree Source + * + * Copyright 2010 Ilya Yanok, Emcraft Systems, ya...@emcraft.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; + +/ { + compatible = mpc8308_p1m; + #address-cells = 1; + #size-cells = 1; + + aliases { + ethernet0 = enet0; + ethernet1 = enet1; + serial0 = serial0; + serial1 = serial1; + pci0 = pci0; + }; + + cpus { + #address-cells = 1; + #size-cells = 0; + + PowerPC,8...@0 { + device_type = cpu; + reg = 0x0; + d-cache-line-size = 32; + i-cache-line-size = 32; + d-cache-size = 16384; + i-cache-size = 16384; + timebase-frequency = 0; // from bootloader + bus-frequency = 0;// from bootloader + clock-frequency = 0; // from bootloader + }; + }; + + memory { + device_type = memory; + reg = 0x 0x0800; // 128MB at 0 + }; + + local...@e0005000 { + #address-cells = 2; + #size-cells = 1; + compatible = fsl,mpc8315-elbc, fsl,elbc, simple-bus; + reg = 0xe0005000 0x1000; + interrupts = 77 0x8; + interrupt-parent = ipic; + + ranges = 0x0 0x0 0xfc00 0x0400 + 0x1 0x0 0xfbff 0x8000 + 0x2 0x0 0xfbff8000 0x8000; + + fl...@0,0 { + #address-cells = 1; + #size-cells = 1; + compatible = cfi-flash; + reg = 0x0 0x0 0x400; + bank-width = 2; + device-width = 1; + + u-b...@0 { + reg = 0x0 0x6; + read-only; + }; + e...@6 { + reg = 0x6 0x2; + }; + e...@8 { + reg = 0x8 0x2; + }; + ker...@a { + reg = 0xa 0x20; + }; + d...@2a { + reg = 0x2a 0x2; + }; + ramd...@2c { + reg = 0x2c 0x64; + }; + u...@70 { + reg = 0x70 0x390; + }; + }; + + c...@1,0 { + compatible = nxp,sja1000; + reg = 0x1 0x0 0x80; + interrupts = 18 0x8; + interrups-parent = ipic; + }; + + c...@2,0 { + compatible = cpld; + reg = 0x2 0x0 0x8; + interrupts = 48 0x8; + interrups-parent = ipic; + }; + }; + + i...@e000 { + #address-cells = 1; + #size-cells = 1; + device_type = soc; + compatible = fsl,mpc8308-immr, simple-bus; + ranges = 0 0xe000 0x0010; + reg = 0xe000 0x0200; + bus-frequency = 0; + + i...@3000 { + #address-cells = 1; + #size-cells = 0; + cell-index = 0; + compatible = fsl-i2c; + reg = 0x3000 0x100; + interrupts = 14 0x8; + interrupt-parent = ipic; + dfsrr
[PATCH] mpc8308: fix USB DR controller initialization
MPC8308 has ULPI pin muxing settings in SICRH register, bits 17-18 which is different from both MPC8313 and MPC8315. Also MPC8308 doesn't have REFSEL, UTMI_PHY_EN and OTG_PORT fields in the USB DR controller CONTROL register. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/boot/dts/mpc8308rdb.dts |2 +- arch/powerpc/platforms/83xx/mpc83xx.h |2 ++ arch/powerpc/platforms/83xx/usb.c | 21 - 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts index a97eb2d..1e2b888 100644 --- a/arch/powerpc/boot/dts/mpc8308rdb.dts +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts @@ -109,7 +109,7 @@ #address-cells = 1; #size-cells = 1; device_type = soc; - compatible = fsl,mpc8315-immr, simple-bus; + compatible = fsl,mpc8308-immr, simple-bus; ranges = 0 0xe000 0x0010; reg = 0xe000 0x0200; bus-frequency = 0; diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index 0fea881..82a4345 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -35,6 +35,8 @@ /* system i/o configuration register high */ #define MPC83XX_SICRH_OFFS 0x118 +#define MPC8308_SICRH_USB_MASK 0x000c +#define MPC8308_SICRH_USB_ULPI 0x0004 #define MPC834X_SICRH_USB_UTMI 0x0002 #define MPC831X_SICRH_USB_MASK 0x00e0 #define MPC831X_SICRH_USB_ULPI 0x00a0 diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 3ba4bb7..2c64164 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -127,7 +127,8 @@ int mpc831x_usb_cfg(void) /* Configure clock */ immr_node = of_get_parent(np); - if (immr_node of_device_is_compatible(immr_node, fsl,mpc8315-immr)) + if (immr_node (of_device_is_compatible(immr_node, fsl,mpc8315-immr) || + of_device_is_compatible(immr_node, fsl,mpc8308-immr))) clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC8315_SCCR_USB_MASK, MPC8315_SCCR_USB_DRCM_01); @@ -138,7 +139,11 @@ int mpc831x_usb_cfg(void) /* Configure pin mux for ULPI. There is no pin mux for UTMI */ if (prop !strcmp(prop, ulpi)) { - if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) { + if (of_device_is_compatible(immr_node, fsl,mpc8308-immr)) { + clrsetbits_be32(immap + MPC83XX_SICRH_OFFS, + MPC8308_SICRH_USB_MASK, + MPC8308_SICRH_USB_ULPI); + } else if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) { clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC8315_SICRL_USB_MASK, MPC8315_SICRL_USB_ULPI); @@ -173,6 +178,9 @@ int mpc831x_usb_cfg(void) !strcmp(prop, utmi))) { u32 refsel; + if (of_device_is_compatible(immr_node, fsl,mpc8308-immr)) + goto out; + if (of_device_is_compatible(immr_node, fsl,mpc8315-immr)) refsel = CONTROL_REFSEL_24MHZ; else @@ -186,9 +194,11 @@ int mpc831x_usb_cfg(void) temp = CONTROL_PHY_CLK_SEL_ULPI; #ifdef CONFIG_USB_OTG /* Set OTG_PORT */ - dr_mode = of_get_property(np, dr_mode, NULL); - if (dr_mode !strcmp(dr_mode, otg)) - temp |= CONTROL_OTG_PORT; + if (!of_device_is_compatible(immr_node, fsl,mpc8308-immr)) { + dr_mode = of_get_property(np, dr_mode, NULL); + if (dr_mode !strcmp(dr_mode, otg)) + temp |= CONTROL_OTG_PORT; + } #endif /* CONFIG_USB_OTG */ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp); } else { @@ -196,6 +206,7 @@ int mpc831x_usb_cfg(void) ret = -EINVAL; } +out: iounmap(usb_regs); of_node_put(np); return ret; -- 1.6.2.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl-pci: Fix MSI support on 83xx platforms
Hi Kumar, 05.08.2010 12:02, Kumar Gala wrote: The following commit broke 83xx because it assumed the 83xx platforms exposed the IMMR address in BAR0 like the 85xx/86xx/QoriQ devices do: commit 3da34aae03d498ee62f75aa7467de93cce3030fd Author: Kumar Galaga...@kernel.crashing.org Date: Tue May 12 15:51:56 2009 -0500 powerpc/fsl: Support unique MSI addresses per PCIe Root Complex However that is not true, so we have to search through the inbound window settings on 83xx to find which one matches the IMMR address to determine its PCI address. As I've already told you my testing on the MPC8308RDB board was successful. As for 85xx boards, Wolfgang told me that DENX doesn't have any 85xx boards that support MSI at the moment, so I can't do complete testing. I'm sorry. I've tested that TQM8560 boards is able to boot and PCI is working as expected though (with your patch applied). I fear I can't do anything else here. Thanks. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl-pci: Fix MSI support on 83xx platforms
Hi Kumar, 05.08.2010 12:02, Kumar Gala wrote: The following commit broke 83xx because it assumed the 83xx platforms exposed the IMMR address in BAR0 like the 85xx/86xx/QoriQ devices do: commit 3da34aae03d498ee62f75aa7467de93cce3030fd Author: Kumar Galaga...@kernel.crashing.org Date: Tue May 12 15:51:56 2009 -0500 powerpc/fsl: Support unique MSI addresses per PCIe Root Complex However that is not true, so we have to search through the inbound window settings on 83xx to find which one matches the IMMR address to determine its PCI address. As I've already told you my testing on the MPC8308RDB board was successful. As for 85xx boards, Wolfgang told me that DENX doesn't have any 85xx boards that support MSI at the moment, so I can't do complete testing. I'm sorry. I've tested that TQM8560 board is able to boot and PCI is working as expected though (with your patch applied). I fear I can't do anything else here. Thanks. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/fsl-pci: Fix MSI support on 83xx platforms
Hi Kumar, On 05.08.2010 12:02, Kumar Gala wrote: However that is not true, so we have to search through the inbound window settings on 83xx to find which one matches the IMMR address to determine its PCI address. Thanks, your patch really does help on MPC8308 board I use. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Commit 3da34aa brakes MSI support on MPC8308 (possibly all MPC83xx) [REPOST]
Hi Kumar, 05.08.2010 11:01, Kumar Gala пишет: I have a fix, can you test? Surely. Where can I find it? Thanks. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
mpc512x_dma hangs when used from multiple threads
Hello Piotr, everybody, I've found that mpc512x_dma channels hang sometimes while accessed from more than one thread simultaneously. The easiest way to reproduce this error I've managed to found is using dmatest module with rather high threads_per_chan value (20 should be enough): -bash-3.2# insmod dmatest.ko max_channels=1 iterations=1 threads_per_chan=20 [ 32.559568] dma0chan0-copy0: terminating after 1 tests, 0 failures (status 0) -bash-3.2# [ 35.553688] dma0chan0-copy1: #0: test timed out [ 35.558207] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.565458] dma0chan0-copy1: #0: test timed out [ 35.569968] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.577219] dma0chan0-copy1: #0: test timed out [ 35.581735] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.588953] dma0chan0-copy2: #0: test timed out [ 35.593502] dma0chan0-copy2: terminating after 1 tests, 1 failures (status 0) [ 35.600720] dma0chan0-copy3: #0: test timed out [ 35.605284] dma0chan0-copy3: terminating after 1 tests, 1 failures (status 0) [ 35.612472] dma0chan0-copy4: #0: test timed out [ 35.617052] dma0chan0-copy4: terminating after 1 tests, 1 failures (status 0) [ 35.624381] dma0chan0-copy5: #0: test timed out [ 35.628895] dma0chan0-copy5: terminating after 1 tests, 1 failures (status 0) [ 35.636126] dma0chan0-copy6: #0: test timed out [ 35.640657] dma0chan0-copy6: terminating after 1 tests, 1 failures (status 0) [ 35.647876] dma0chan0-copy7: #0: test timed out [ 35.652425] dma0chan0-copy7: terminating after 1 tests, 1 failures (status 0) [ 35.659643] dma0chan0-copy8: #0: test timed out [ 35.664209] dma0chan0-copy8: terminating after 1 tests, 1 failures (status 0) [ 35.671395] dma0chan0-copy9: #0: test timed out [ 35.675976] dma0chan0-copy9: terminating after 1 tests, 1 failures (status 0) [ 35.683164] dma0chan0-copy1: #0: test timed out [ 35.687743] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.694942] dma0chan0-copy1: #0: test timed out [ 35.699495] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.706714] dma0chan0-copy1: #0: test timed out [ 35.711264] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.719826] dma0chan0-copy1: #0: test timed out [ 35.724404] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.731549] dma0chan0-copy1: #0: test timed out [ 35.736131] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.744247] dma0chan0-copy1: #0: test timed out [ 35.748778] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.756768] dma0chan0-copy1: #0: test timed out [ 35.761301] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) [ 35.769306] dma0chan0-copy1: #0: test timed out [ 35.773883] dma0chan0-copy1: terminating after 1 tests, 1 failures (status 0) Also, this can be reproduced using more than one channel at once (in this case some of channels eventually hang). With max_channels=1 and threads_per_chan=1 dmatest works fine so I think this should be a synchronization issue. After the hang, channel becomes unusable and cannot be even freed... Is it a known problem? Maybe there exists some fix or workaround for it? Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Commit 3da34aa brakes MSI support on MPC8308 (possibly all MPC83xx) [REPOST]
Hi Kumar, Kim, Josh, everybody, I hope to disturb you but I haven't got any reply for my first posting... I've found that MSI work correctly with older kernels on my MPC8308RDB board and don't work with newer ones. After bisecting I've found that the source of the problem is commit 3da34aa: commit 3da34aae03d498ee62f75aa7467de93cce3030fd Author: Kumar Gala ga...@kernel.crashing.org Date: Tue May 12 15:51:56 2009 -0500 powerpc/fsl: Support unique MSI addresses per PCIe Root Complex Its feasible based on how the PCI address map is setup that the region of PCI address space used for MSIs differs for each PHB on the same SoC. Instead of assuming that the address mappes to CCSRBAR 1:1 we read PEXCSRBAR (BAR0) for the PHB that the given pci_dev is on. Signed-off-by: Kumar Gala ga...@kernel.crashing.org I can see BAR0 initialization for 85xx/86xx hardware but not for 83xx neigher in the kernel nor in U-Boot (that makes me think that all 83xx can be affected). I'm not actually an PCI expert so I've just tried to write IMMR base address to the BAR0 register from the U-Boot to get the correct address but this doesn't help. Please direct me how to init 83xx PCIE controller to make it compatible with this patch. Kim, I think MPC8315E is affected too, could you please test it? Thanks in advance. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Commit 3da34aa brakes MSI support on MPC8308 (possibly all MPC83xx) [REPOST]
23.07.2010 1:09, Ilya Yanok wrote: I hope to disturb you but I haven't got any reply for my first posting... I shouldn't be working at night... It's 'hate' not 'hope'... Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/2] mpc8308rdb: support for MPC8308RDB board from Freescale
This patch adds support for MPC8308RDB development board from Freescale. Supported devices: DUART Dual Ethernet NOR and NAND flashes I2C USB in peripheral mode PCIE support is broken by the commit 3da34aa (powerpc/fsl: Support unique MSI addresses per PCIe Root Complex). Works after revert. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/boot/dts/mpc8308rdb.dts | 303 + arch/powerpc/platforms/83xx/Kconfig |8 + arch/powerpc/platforms/83xx/Makefile |1 + arch/powerpc/platforms/83xx/mpc830x_rdb.c | 94 + 4 files changed, 406 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/boot/dts/mpc8308rdb.dts create mode 100644 arch/powerpc/platforms/83xx/mpc830x_rdb.c diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts new file mode 100644 index 000..a97eb2d --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8308rdb.dts @@ -0,0 +1,303 @@ +/* + * MPC8308RDB Device Tree Source + * + * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2010 Ilya Yanok, Emcraft Systems, ya...@emcraft.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/dts-v1/; + +/ { + compatible = fsl,mpc8308rdb; + #address-cells = 1; + #size-cells = 1; + + aliases { + ethernet0 = enet0; + ethernet1 = enet1; + serial0 = serial0; + serial1 = serial1; + pci0 = pci0; + }; + + cpus { + #address-cells = 1; + #size-cells = 0; + + PowerPC,8...@0 { + device_type = cpu; + reg = 0x0; + d-cache-line-size = 32; + i-cache-line-size = 32; + d-cache-size = 16384; + i-cache-size = 16384; + timebase-frequency = 0; // from bootloader + bus-frequency = 0;// from bootloader + clock-frequency = 0; // from bootloader + }; + }; + + memory { + device_type = memory; + reg = 0x 0x0800; // 128MB at 0 + }; + + local...@e0005000 { + #address-cells = 2; + #size-cells = 1; + compatible = fsl,mpc8315-elbc, fsl,elbc, simple-bus; + reg = 0xe0005000 0x1000; + interrupts = 77 0x8; + interrupt-parent = ipic; + + // CS0 and CS1 are swapped when + // booting from nand, but the + // addresses are the same. + ranges = 0x0 0x0 0xfe00 0x0080 + 0x1 0x0 0xe060 0x2000 + 0x2 0x0 0xf000 0x0002 + 0x3 0x0 0xfa00 0x8000; + + fl...@0,0 { + #address-cells = 1; + #size-cells = 1; + compatible = cfi-flash; + reg = 0x0 0x0 0x80; + bank-width = 2; + device-width = 1; + + u-b...@0 { + reg = 0x0 0x6; + read-only; + }; + e...@6 { + reg = 0x6 0x1; + }; + e...@7 { + reg = 0x7 0x1; + }; + ker...@8 { + reg = 0x8 0x20; + }; + d...@28 { + reg = 0x28 0x1; + }; + ramd...@29 { + reg = 0x29 0x57; + }; + }; + + n...@1,0 { + #address-cells = 1; + #size-cells = 1; + compatible = fsl,mpc8315-fcm-nand, +fsl,elbc-fcm-nand; + reg = 0x1 0x0 0x2000; + + jf...@0 { + reg = 0x0 0x200; + }; + }; + }; + + i...@e000 { + #address-cells = 1; + #size-cells = 1; + device_type = soc; + compatible = fsl,mpc8315-immr, simple-bus; + ranges = 0 0xe000 0x0010; + reg = 0xe000 0x0200; + bus-frequency = 0; + + i...@3000
Commit 3da34aa brakes MSI support on MPC8308 (possibly all MPC83xx)
Hi Kumar, All, I've found that MSI work correctly with older kernels on my MPC8308RDB board and don't work with newer ones. After bisecting I've found that the source of the problem is commit 3da34aa: commit 3da34aae03d498ee62f75aa7467de93cce3030fd Author: Kumar Gala ga...@kernel.crashing.org Date: Tue May 12 15:51:56 2009 -0500 powerpc/fsl: Support unique MSI addresses per PCIe Root Complex Its feasible based on how the PCI address map is setup that the region of PCI address space used for MSIs differs for each PHB on the same SoC. Instead of assuming that the address mappes to CCSRBAR 1:1 we read PEXCSRBAR (BAR0) for the PHB that the given pci_dev is on. Signed-off-by: Kumar Gala ga...@kernel.crashing.org I can see BAR0 initialization for 85xx/86xx hardware but not for 83xx neigher in the kernel nor in U-Boot (that makes me think that all 83xx can be affected). I'm not actually an PCI expert so I've just tried to write IMMR base address to the BAR0 register from the U-Boot to get the correct address but this doesn't help. Please direct me how to init 83xx PCIE controller to make it compatible with this patch. Thanks in advance. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 0/2] Support for MPC8308RDB development board
These two patches add support for MPC8308RDB development board from Freescale. Supported hardware: DUART Dual Ethernet NOR and NAND flashes I2C USB device PCIE (MSI support is broken by commit 3da34aa) Signed-off-by: Ilya Yanok ya...@emcraft.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] fsl_pci: add quirk for mpc8308 pcie bridge
This patch adds the quirk for PCIE controller found on Freescale MPC8308. The quirk is the same as for other MPC83xx processors. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/sysdev/fsl_pci.c |1 + include/linux/pci_ids.h |1 + 2 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index a14760f..7e900ec 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -412,6 +412,7 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header); #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */ #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x) +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8308, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8314E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8314, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8315E, quirk_fsl_pcie_header); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3bedcc1..79bb11f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2264,6 +2264,7 @@ #define PCI_DEVICE_ID_TDI_EHCI 0x0101 #define PCI_VENDOR_ID_FREESCALE0x1957 +#define PCI_DEVICE_ID_MPC8308 0xc006 #define PCI_DEVICE_ID_MPC8315E 0x00b4 #define PCI_DEVICE_ID_MPC8315 0x00b5 #define PCI_DEVICE_ID_MPC8314E 0x00b6 -- 1.6.2.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/3] powerpc: allow 256kB pages with SHMEM
Hi Hugh, Hugh Dickins wrote: Now that shmem's divisions by zero and SHMEM_MAX_BYTES are fixed, let powerpc 256kB pages coexist with CONFIG_SHMEM again. Signed-off-by: Hugh Dickins h...@veritas.com Acked-by: Ilya Yanok ya...@emcraft.com --- Added linuxppc-dev and some other Cc's for this 3/3: sorry if you didn't see 1/3 and 2/3, they were just in mm/shmem.c. arch/powerpc/Kconfig |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- 2.6.29-git13/arch/powerpc/Kconfig 2009-04-06 11:47:57.0 +0100 +++ linux/arch/powerpc/Kconfig2009-04-06 18:18:47.0 +0100 @@ -462,7 +462,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool 256k page size if 44x - depends on !STDBINUTILS (!SHMEM || BROKEN) + depends on !STDBINUTILS help Make the page size 256k. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: rework dma-noncoherent to use generic vmap/vunmap functions
Hi Ben, excuse me for so long time to reply. Benjamin Herrenschmidt wrote: This patch rewrites consistent dma allocations support to use vmalloc layer to allocate virtual memory space from vmalloc pool and get rid of CONFIG_CONSISTENT_{START,SIZE}. So as commented before, please drop the defconfig updates. Ok. -/* * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ @@ -151,19 +41,17 @@ void * __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; -struct vm_region *c; unsigned long order; +void *v; +int i; +struct page *pages[PAGE_ALIGN(size)PAGE_SHIFT]; I'm not -too- fan of that page list one the stack up there. I understand why you don't wantto kmalloc something here etc... but that's what __vmalloc_area() does and it's somewhat useful to keep track of the page array that way, it might prove handy in the future. I don't like array being on stack too... But I fear I didn't understand what were you talking about here... __vmalloc_area does kmalloc or vmalloc to allocate pages array and then allocates pages one by one but we need physically contiguous pages here... (And that is why we don't really need to store pages array) So I just added kmalloc/vmalloc to allocate the pages array and stored it in vm_struct structure. Might even be worth adding a generic patch to add a VM_COHERENT_DMA flag so they can be listed as such and make sure you set the caller field yourself with your own caller. I used __builtin_return_address(1) as the 'caller' so I get useful output in /proc/vmallocinfo (btw, ioremap doesn't provide useful 'caller'). Do you think we have high chances of such a patch being accepted in lkml? Well, I'll try to do this (for now I stick with VM_IOREMAP). (Hint: look at the output of /proc/vmallocinfo) Also, the mucking around with PG_Reserved shouldn't be of any use anymore. Ok, removed. Please review the updated patch (I'll post it as a followup). Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] powerpc: rework dma-noncoherent to use generic vmalloc layer (2nd rev)
This patch rewrites consistent dma allocations support to use vmalloc layer to allocate virtual memory space from vmalloc pool and get rid of CONFIG_CONSISTENT_{START,SIZE}. I still use VM_IOREMAP flag for these allocations (I'll try to post patch adding separate VM_COHERENT_DMA flag to lkml later). Now I save pages array in vm_struct pages field and use __builtin_return_address(1) as 'caller' argument to get usefull info in /proc/vmallocinfo: -bash-3.2# cat /proc/vmallocinfo 0xe100-0xe10020008192 __ioremap+0x184/0x194 ioremap [...] 0xe1f9-0xe1f96000 24576 PrimeIocFifos+0x3a8/0x550 pages=5 ioremap 0xe1fa-0xe1fb1000 69632 __ioremap+0x184/0x194 ioremap 0xe1fc-0xe1fd1000 69632 __ioremap+0x184/0x194 ioremap 0xe200-0xe2f01000 15732736 __ioremap+0x184/0x194 ioremap 0xe2f32000-0xe2f340008192 __ioremap+0x184/0x194 ioremap 0xe2f38000-0xe2f3d000 20480 __ioremap+0x184/0x194 ioremap 0xe2f4-0xe2f46000 24576 PrimeIocFifos+0x3a8/0x550 pages=5 ioremap 0xe2f8-0xe3e81000 15732736 __ioremap+0x184/0x194 ioremap 0xe3f0-0xe3f48000 294912 PrimeIocFifos+0x204/0x550 pages=71 ioremap 0xe3f8-0xe3fc8000 294912 PrimeIocFifos+0x204/0x550 pages=71 ioremap 0xe3fe-0xe3fe6000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe3fe8000-0xe3fee000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe3fef000-0xe3ff8000 36864 bdx_open+0x10c/0x60c pages=8 vmalloc 0xe400-0xe4006000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe4007000-0xe400f000 32768 bdx_open+0x1f0/0x60c pages=7 vmalloc 0xe401-0xe401a000 40960 bdx_fifo_init+0x8c/0x12c pages=9 ioremap (those ioremaps with pages=) Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/Kconfig | 25 --- arch/powerpc/lib/dma-noncoherent.c | 299 +++- 2 files changed, 53 insertions(+), 271 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 74cc312..ecae53f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -815,31 +815,6 @@ config TASK_SIZE default 0x8000 if PPC_PREP || PPC_8xx default 0xc000 -config CONSISTENT_START_BOOL - bool Set custom consistent memory pool address - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex Base virtual address of consistent memory pool if CONSISTENT_START_BOOL - default 0xfd00 if (NOT_COHERENT_CACHE 8xx) - default 0xff10 if NOT_COHERENT_CACHE - -config CONSISTENT_SIZE_BOOL - bool Set custom consistent memory pool size - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the size of the - consistent memory pool. This pool of virtual memory - is used to make consistent memory allocations. - -config CONSISTENT_SIZE - hex Size of consistent memory pool if CONSISTENT_SIZE_BOOL - default 0x0020 if NOT_COHERENT_CACHE - config PIN_TLB bool Pinned Kernel TLBs (860 ONLY) depends on ADVANCED_OPTIONS 8xx diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index b7dc4c1..2e321a9 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -29,121 +29,11 @@ #include linux/types.h #include linux/highmem.h #include linux/dma-mapping.h +#include linux/vmalloc.h #include asm/tlbflush.h /* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It - * can be further configured for specific applications under - * the Advanced Setup menu. -Matt - */ -#define CONSISTENT_BASE(CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - *struct vm_region region; - *unsigned longflags; - *struct page **pages; - *unsigned int nr_pages; - *unsigned longphys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list= LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However
Re: [PATCH] powerpc: rework dma-noncoherent to use generic vmap/vunmap functions
Hi Ben, Benjamin Herrenschmidt wrote: btw, ioremap doesn't provide useful 'caller'). I fixed that :-) (see patches I posted to the list, though that's waiting for a patch to go upstream first that adds a __get_vm_area_caller() that I need for ppc64). Yep, I saw them. Btw, I've posted update consistent memory patch in a new thread please take a look. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] powerpc: rework dma-noncoherent to use generic vmalloc layer (3rd rev)
This patch rewrites consistent dma allocations support to use vmalloc layer to allocate virtual memory space from vmalloc pool and get rid of CONFIG_CONSISTENT_{START,SIZE}. I still use VM_IOREMAP flag for these allocations (I'll try to post patch adding separate VM_COHERENT_DMA flag to lkml later). Now I save pages array in vm_struct pages field and use __builtin_return_address(1) as 'caller' argument to get usefull info in /proc/vmallocinfo: -bash-3.2# cat /proc/vmallocinfo 0xe100-0xe10020008192 __ioremap+0x184/0x194 ioremap [...] 0xe1f9-0xe1f96000 24576 PrimeIocFifos+0x3a8/0x550 pages=5 ioremap 0xe1fa-0xe1fb1000 69632 __ioremap+0x184/0x194 ioremap 0xe1fc-0xe1fd1000 69632 __ioremap+0x184/0x194 ioremap 0xe200-0xe2f01000 15732736 __ioremap+0x184/0x194 ioremap 0xe2f32000-0xe2f340008192 __ioremap+0x184/0x194 ioremap 0xe2f38000-0xe2f3d000 20480 __ioremap+0x184/0x194 ioremap 0xe2f4-0xe2f46000 24576 PrimeIocFifos+0x3a8/0x550 pages=5 ioremap 0xe2f8-0xe3e81000 15732736 __ioremap+0x184/0x194 ioremap 0xe3f0-0xe3f48000 294912 PrimeIocFifos+0x204/0x550 pages=71 ioremap 0xe3f8-0xe3fc8000 294912 PrimeIocFifos+0x204/0x550 pages=71 ioremap 0xe3fe-0xe3fe6000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe3fe8000-0xe3fee000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe3fef000-0xe3ff8000 36864 bdx_open+0x10c/0x60c pages=8 vmalloc 0xe400-0xe4006000 24576 bdx_fifo_init+0x8c/0x12c pages=5 ioremap 0xe4007000-0xe400f000 32768 bdx_open+0x1f0/0x60c pages=7 vmalloc 0xe401-0xe401a000 40960 bdx_fifo_init+0x8c/0x12c pages=9 ioremap (those ioremaps with pages=) Signed-off-by: Ilya Yanok ya...@emcraft.com --- Previous version had a memory leak in error code path. --- arch/powerpc/Kconfig | 25 --- arch/powerpc/lib/dma-noncoherent.c | 303 +++- 2 files changed, 57 insertions(+), 271 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 74cc312..ecae53f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -815,31 +815,6 @@ config TASK_SIZE default 0x8000 if PPC_PREP || PPC_8xx default 0xc000 -config CONSISTENT_START_BOOL - bool Set custom consistent memory pool address - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex Base virtual address of consistent memory pool if CONSISTENT_START_BOOL - default 0xfd00 if (NOT_COHERENT_CACHE 8xx) - default 0xff10 if NOT_COHERENT_CACHE - -config CONSISTENT_SIZE_BOOL - bool Set custom consistent memory pool size - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the size of the - consistent memory pool. This pool of virtual memory - is used to make consistent memory allocations. - -config CONSISTENT_SIZE - hex Size of consistent memory pool if CONSISTENT_SIZE_BOOL - default 0x0020 if NOT_COHERENT_CACHE - config PIN_TLB bool Pinned Kernel TLBs (860 ONLY) depends on ADVANCED_OPTIONS 8xx diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index b7dc4c1..005a28d 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -29,121 +29,11 @@ #include linux/types.h #include linux/highmem.h #include linux/dma-mapping.h +#include linux/vmalloc.h #include asm/tlbflush.h /* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It - * can be further configured for specific applications under - * the Advanced Setup menu. -Matt - */ -#define CONSISTENT_BASE(CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - *struct vm_region region; - *unsigned longflags; - *struct page **pages; - *unsigned int nr_pages; - *unsigned longphys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list= LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START
[PATCH] powerpc: rework dma-noncoherent to use generic vmap/vunmap functions
This patch rewrites consistent dma allocations support to use vmalloc layer to allocate virtual memory space from vmalloc pool and get rid of CONFIG_CONSISTENT_{START,SIZE}. Signed-off-by: Ilya Yanok ya...@emcraft.com --- arch/powerpc/Kconfig | 25 -- arch/powerpc/configs/40x/acadia_defconfig |2 - arch/powerpc/configs/40x/ep405_defconfig |2 - arch/powerpc/configs/40x/hcu4_defconfig|2 - arch/powerpc/configs/40x/kilauea_defconfig |2 - arch/powerpc/configs/40x/makalu_defconfig |2 - arch/powerpc/configs/40x/virtex_defconfig |2 - arch/powerpc/configs/40x/walnut_defconfig |2 - arch/powerpc/configs/44x/arches_defconfig |2 - arch/powerpc/configs/44x/bamboo_defconfig |2 - arch/powerpc/configs/44x/canyonlands_defconfig |2 - arch/powerpc/configs/44x/ebony_defconfig |2 - arch/powerpc/configs/44x/katmai_defconfig |2 - arch/powerpc/configs/44x/rainier_defconfig |2 - arch/powerpc/configs/44x/sam440ep_defconfig|2 - arch/powerpc/configs/44x/sequoia_defconfig |2 - arch/powerpc/configs/44x/taishan_defconfig |2 - arch/powerpc/configs/44x/virtex5_defconfig |2 - arch/powerpc/configs/44x/warp_defconfig|2 - arch/powerpc/configs/adder875_defconfig|2 - arch/powerpc/configs/c2k_defconfig |2 - arch/powerpc/configs/ep88xc_defconfig |2 - arch/powerpc/configs/mgsuvd_defconfig |2 - arch/powerpc/configs/mpc866_ads_defconfig |2 - arch/powerpc/configs/mpc885_ads_defconfig |2 - arch/powerpc/configs/ppc40x_defconfig |2 - arch/powerpc/configs/ppc44x_defconfig |2 - arch/powerpc/configs/prpmc2800_defconfig |2 - arch/powerpc/lib/dma-noncoherent.c | 278 +++ 29 files changed, 37 insertions(+), 320 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a..a451a06 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -777,31 +777,6 @@ config TASK_SIZE default 0x8000 if PPC_PREP || PPC_8xx default 0xc000 -config CONSISTENT_START_BOOL - bool Set custom consistent memory pool address - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex Base virtual address of consistent memory pool if CONSISTENT_START_BOOL - default 0xfd00 if (NOT_COHERENT_CACHE 8xx) - default 0xff10 if NOT_COHERENT_CACHE - -config CONSISTENT_SIZE_BOOL - bool Set custom consistent memory pool size - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE - help - This option allows you to set the size of the - consistent memory pool. This pool of virtual memory - is used to make consistent memory allocations. - -config CONSISTENT_SIZE - hex Size of consistent memory pool if CONSISTENT_SIZE_BOOL - default 0x0020 if NOT_COHERENT_CACHE - config PIN_TLB bool Pinned Kernel TLBs (860 ONLY) depends on ADVANCED_OPTIONS 8xx diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index 25572cc..ea5d89c 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -265,8 +265,6 @@ CONFIG_PAGE_OFFSET=0xc000 CONFIG_KERNEL_START=0xc000 CONFIG_PHYSICAL_START=0x CONFIG_TASK_SIZE=0xc000 -CONFIG_CONSISTENT_START=0xff10 -CONFIG_CONSISTENT_SIZE=0x0020 CONFIG_NET=y # diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index b80ba7a..1f3ebea 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -267,8 +267,6 @@ CONFIG_PAGE_OFFSET=0xc000 CONFIG_KERNEL_START=0xc000 CONFIG_PHYSICAL_START=0x CONFIG_TASK_SIZE=0xc000 -CONFIG_CONSISTENT_START=0xff10 -CONFIG_CONSISTENT_SIZE=0x0020 CONFIG_NET=y # diff --git a/arch/powerpc/configs/40x/hcu4_defconfig b/arch/powerpc/configs/40x/hcu4_defconfig index 45dcb82..bfb010d 100644 --- a/arch/powerpc/configs/40x/hcu4_defconfig +++ b/arch/powerpc/configs/40x/hcu4_defconfig @@ -265,8 +265,6 @@ CONFIG_PAGE_OFFSET=0xc000 CONFIG_KERNEL_START=0xc000 CONFIG_PHYSICAL_START=0x CONFIG_TASK_SIZE=0xc000 -CONFIG_CONSISTENT_START=0xff10 -CONFIG_CONSISTENT_SIZE=0x0020 CONFIG_NET=y # diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index e2f3695..40e181f 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -265,8 +265,6 @@ CONFIG_PAGE_OFFSET=0xc000
[PATCH] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
This patch adds support for page sizes bigger than 4K (16K/64K) on PPC 44x. PGDIR table is much smaller than page in case of 16K/64K pages (512 and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of __get_free_pages(). PTE table covers rather big memory area in case of 16K/64K pages (32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in area covered by one PTE table. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Vladimir Panfilov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig | 58 arch/powerpc/include/asm/highmem.h | 15 +++- arch/powerpc/include/asm/mmu-44x.h | 17 + arch/powerpc/include/asm/page.h| 13 --- arch/powerpc/include/asm/page_32.h |7 +++- arch/powerpc/kernel/asm-offsets.c |4 ++ arch/powerpc/kernel/head_44x.S | 23 - arch/powerpc/kernel/misc_32.S | 12 +++--- arch/powerpc/mm/pgtable_32.c | 23 - arch/powerpc/platforms/Kconfig.cputype |2 +- 10 files changed, 126 insertions(+), 48 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a..cd8ff7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool 64k page size - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt Page size + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines - without processor support for 64k pages, the kernel will simulate - them by loading each individual 4k page on demand transparently, - while on hardware with such support, it will be used to map - normal application pages. + Select the kernel logical page size. Increasing the page size + will reduce software overhead at each page boundary, allow + hardware prefetch mechanisms to be more effective, and allow + larger dma transfers increasing IO efficiency and reducing + overhead. However the utilization of memory will increase. + For example, each cached file will using a multiple of the + page size to hold its contents and the difference between the + end of file and the end of page is wasted. + + Some dedicated systems, such as software raid serving with + accelerated calculations, have shown significant increases. + + If you configure a 64 bit kernel for 64k pages but the + processor does not support them, then the kernel will simulate + them with 4k pages, loading them on demand, but with the + reduced software overhead and larger internal fragmentation. + For the 32 bit kernel, a large page option will not be offered + unless it is supported by the configured processor. + + If unsure, choose 4K_PAGES. + +config PPC_4K_PAGES + bool 4k page size + +config PPC_16K_PAGES + bool 16k page size if 44x + +config PPC_64K_PAGES + bool 64k page size if 44x || PPC_STD_MMU_64 + select PPC_HAS_HASH_64K if PPC_STD_MMU_64 + +endchoice config FORCE_MAX_ZONEORDER int Maximum zone order - range 9 64 if PPC_64K_PAGES - default 9 if PPC_64K_PAGES - range 13 64 if PPC64 !PPC_64K_PAGES - default 13 if PPC64 !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_64 PPC_64K_PAGES + default 9 if PPC_STD_MMU_64 PPC_64K_PAGES + range 13 64 if PPC_STD_MMU_64 !PPC_64K_PAGES + default 13 if PPC_STD_MMU_64 !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_32 PPC_16K_PAGES + default 9 if PPC_STD_MMU_32 PPC_16K_PAGES + range 7 64 if PPC_STD_MMU_32 PPC_64K_PAGES + default 7 if PPC_STD_MMU_32 PPC_64K_PAGES range 11 64 default 11 help @@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool Support setting protections for 4k subpages - depends on PPC_64K_PAGES + depends on PPC_STD_MMU_64 PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 91c5895..9875540 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,20 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) +/* + * We use one full pte table with 4K pages. And with 16K/64K pages pte + * table covers enough memory (32MB and 512MB resp.) that both FIXMAP + * and PKMAP can be placed in single pte table. We use 1024
Re: [PATCH] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
Hello guys, please discard this patch. It just doesn't compile with CONFIG_HIGHMEM set. I'll post updated patch in a short time. Excuse me. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
Hi Benjamin, Benjamin Herrenschmidt wrote: I'm not sure about the above PMD_MASK. Shouldn't we instead make it not build if (PKMAP_BASE PMD_MASK) != 0 ? We separated the !4K_PAGES case here exactly because (PKMAP_BASE PMD_MASK) != 0 [see the comment to this chunk - why]. So, this'll turn out to be broken if we follow your suggestion. Are there any reasons why we should have PKMAP_BASE aligned on the PMD_SIZE boundary ? No, you are right, so why do we need the PMD_MASK in the 4k case ? What I don't get is why do we need a different formula for 4k and 64k but I might just be stupid :-) Because we want full PTE table for PKMAP with 4K pages (it's pretty small - only 512 pages so we really want to use all). And as current code doesn't support PKMAP being in different PTE tables we want to align to full PTE table not to lost part of it. These definitions seem to be related to the page table, so, as for me, then pgtable.h is the better place for them. Though, as you want; we'll move this to page_32.h. Well, I like having them next to the pte_t/pgd_t definitions since they relate directly to the size of those structures. Well. But pte_t/pgd_t are actually in page.h not page_32.h (along with pmd/pud for 64bit)... So maybe we need to put definitions for PMD_T_LOG2/PUD_T_LOG2 too... Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
This patch adds support for page sizes bigger than 4K (16K/64K) on PPC 44x. PGDIR table is much smaller than page in case of 16K/64K pages (512 and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of __get_free_pages(). PTE table covers rather big memory area in case of 16K/64K pages (32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in area covered by one PTE table. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Vladimir Panfilov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig | 58 arch/powerpc/include/asm/highmem.h | 19 +- arch/powerpc/include/asm/mmu-44x.h | 17 + arch/powerpc/include/asm/page.h| 13 --- arch/powerpc/include/asm/page_32.h |7 +++- arch/powerpc/kernel/asm-offsets.c |4 ++ arch/powerpc/kernel/head_44x.S | 23 - arch/powerpc/kernel/misc_32.S | 12 +++--- arch/powerpc/mm/pgtable_32.c | 23 - arch/powerpc/platforms/Kconfig.cputype |2 +- 10 files changed, 130 insertions(+), 48 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a..cd8ff7c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool 64k page size - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt Page size + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines - without processor support for 64k pages, the kernel will simulate - them by loading each individual 4k page on demand transparently, - while on hardware with such support, it will be used to map - normal application pages. + Select the kernel logical page size. Increasing the page size + will reduce software overhead at each page boundary, allow + hardware prefetch mechanisms to be more effective, and allow + larger dma transfers increasing IO efficiency and reducing + overhead. However the utilization of memory will increase. + For example, each cached file will using a multiple of the + page size to hold its contents and the difference between the + end of file and the end of page is wasted. + + Some dedicated systems, such as software raid serving with + accelerated calculations, have shown significant increases. + + If you configure a 64 bit kernel for 64k pages but the + processor does not support them, then the kernel will simulate + them with 4k pages, loading them on demand, but with the + reduced software overhead and larger internal fragmentation. + For the 32 bit kernel, a large page option will not be offered + unless it is supported by the configured processor. + + If unsure, choose 4K_PAGES. + +config PPC_4K_PAGES + bool 4k page size + +config PPC_16K_PAGES + bool 16k page size if 44x + +config PPC_64K_PAGES + bool 64k page size if 44x || PPC_STD_MMU_64 + select PPC_HAS_HASH_64K if PPC_STD_MMU_64 + +endchoice config FORCE_MAX_ZONEORDER int Maximum zone order - range 9 64 if PPC_64K_PAGES - default 9 if PPC_64K_PAGES - range 13 64 if PPC64 !PPC_64K_PAGES - default 13 if PPC64 !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_64 PPC_64K_PAGES + default 9 if PPC_STD_MMU_64 PPC_64K_PAGES + range 13 64 if PPC_STD_MMU_64 !PPC_64K_PAGES + default 13 if PPC_STD_MMU_64 !PPC_64K_PAGES + range 9 64 if PPC_STD_MMU_32 PPC_16K_PAGES + default 9 if PPC_STD_MMU_32 PPC_16K_PAGES + range 7 64 if PPC_STD_MMU_32 PPC_64K_PAGES + default 7 if PPC_STD_MMU_32 PPC_64K_PAGES range 11 64 default 11 help @@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool Support setting protections for 4k subpages - depends on PPC_64K_PAGES + depends on PPC_STD_MMU_64 PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 91c5895..7d6bb37 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,24 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) +/* + * We use one full pte table with 4K pages. And with 16K/64K pages pte + * table covers enough memory (32MB and 512MB resp.) that both FIXMAP + * and PKMAP can be placed in single pte table. We use 1024
[RFC/PATCH] powerpc: consistent memory mapping.
Defining the start virtual address of the consistent memory in configs leads to overlapping of the consistent area with the other virtual regions (fixmap, pkmap, vmalloc). Defaults from current kernel just set consistent memory area to be somewhere high in the vmalloc area and then you need to pray there will be not enough vmalloc allocations to overlap. So, this patch makes the virtual address of the consistent memory to be assigned dynamically, at the end of the virtual address area. The fixmap area is now shifted to the low addresses, and ends before start of the consistent virtual addresses. User is now allowed to configure the size of the consistent memory area only. The exception has been made for 8xx archs, where the start of the consistent memory is still configurable: this is to avoid overlapping with the IMM space of 8xx. Actually this is wrong. We have a possibility to overlap not only for consistent memory but for IMM space too. But we don't have much expertise in 8xx so we are looking forward for some advice here. The following items remain to be done to complete supporting of the consistent memory fully: a) we missing 1 (last) page of addresses at the end of the consistent memory area; b) if CONFIG_CONSISTENT_SIZE is such that we cover more address regions than served by 1 pgd level, then mapping of the pages to these additional areas won't work (this 'feature' isn't introduced by this patch, but is the consequence of the current consistent memory support code, where consistent_pte is set in dma_alloc_init() in accordance with the pgd of the CONSISTENT_BASE address). Signed-off-by: Ilya Yanok [EMAIL PROTECTED] Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] --- arch/powerpc/Kconfig |7 --- arch/powerpc/lib/dma-noncoherent.c |5 + arch/powerpc/mm/pgtable_32.c |2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aa2eb46..4d62446 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -809,7 +809,7 @@ config TASK_SIZE config CONSISTENT_START_BOOL bool Set custom consistent memory pool address - depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE + depends on ADVANCED_OPTIONS NOT_COHERENT_CACHE 8xx help This option allows you to set the base virtual address of the consistent memory pool. This pool of virtual @@ -817,8 +817,8 @@ config CONSISTENT_START_BOOL config CONSISTENT_START hex Base virtual address of consistent memory pool if CONSISTENT_START_BOOL - default 0xfd00 if (NOT_COHERENT_CACHE 8xx) - default 0xff10 if NOT_COHERENT_CACHE + depends on 8xx + default 0xfd00 if NOT_COHERENT_CACHE config CONSISTENT_SIZE_BOOL bool Set custom consistent memory pool size @@ -831,6 +831,7 @@ config CONSISTENT_SIZE_BOOL config CONSISTENT_SIZE hex Size of consistent memory pool if CONSISTENT_SIZE_BOOL default 0x0020 if NOT_COHERENT_CACHE + default 0x if !NOT_COHERENT_CACHE config PIN_TLB bool Pinned Kernel TLBs (860 ONLY) diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index 31734c0..3c12577 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -38,8 +38,13 @@ * can be further configured for specific applications under * the Advanced Setup menu. -Matt */ +#ifdef CONFIG_CONSISTENT_START #define CONSISTENT_BASE(CONFIG_CONSISTENT_START) #define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#else +#define CONSISTENT_BASE((unsigned long)(-CONFIG_CONSISTENT_SIZE)) +#define CONSISTENT_END ((unsigned long)(-PAGE_SIZE)) +#endif /* CONFIG_CONSISTENT_START */ #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) PAGE_SHIFT) /* diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 10d21c3..fda24c7 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -395,7 +395,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = (-PAGE_SIZE); +unsigned long FIXADDR_TOP = (-PAGE_SIZE-CONFIG_CONSISTENT_SIZE); EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -- 1.5.6.1 ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
This patch adds support for page sizes bigger than 4K (16K/64K) on PPC 44x. PGDIR table is much smaller than page in case of 16K/64K pages (512 and 32 bytes resp.) so we allocate PGDIR with kzalloc() instead of __get_free_pages(). PTE table covers rather big memory area in case of 16K/64K pages (32MB and 512MB resp.) so we can easily put FIXMAP and PKMAP in area covered by one PTE table. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Vladimir Panfilov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig | 54 --- arch/powerpc/include/asm/highmem.h | 15 - arch/powerpc/include/asm/mmu-44x.h | 18 ++ arch/powerpc/include/asm/page.h| 13 +--- arch/powerpc/include/asm/page_32.h |3 +- arch/powerpc/include/asm/pgtable.h |2 + arch/powerpc/kernel/asm-offsets.c |4 ++ arch/powerpc/kernel/head_44x.S | 22 - arch/powerpc/kernel/misc_32.S | 12 +++--- arch/powerpc/mm/pgtable_32.c | 13 ++- arch/powerpc/platforms/Kconfig.cputype |2 +- 11 files changed, 112 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a..aa2eb46 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -401,23 +401,53 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool 64k page size - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt Page size + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines - without processor support for 64k pages, the kernel will simulate - them by loading each individual 4k page on demand transparently, - while on hardware with such support, it will be used to map - normal application pages. + Select the kernel logical page size. Increasing the page size + will reduce software overhead at each page boundary, allow + hardware prefetch mechanisms to be more effective, and allow + larger dma transfers increasing IO efficiency and reducing + overhead. However the utilization of memory will increase. + For example, each cached file will using a multiple of the + page size to hold its contents and the difference between the + end of file and the end of page is wasted. + + Some dedicated systems, such as software raid serving with + accelerated calculations, have shown significant increases. + + If you configure a 64 bit kernel for 64k pages but the + processor does not support them, then the kernel will simulate + them with 4k pages, loading them on demand, but with the + reduced software overhead and larger internal fragmentation. + For the 32 bit kernel, a large page option will not be offered + unless it is supported by the configured processor. + + If unsure, choose 4K_PAGES. + +config PPC_4K_PAGES + bool 4k page size + +config PPC_16K_PAGES + bool 16k page size if 44x + +config PPC_64K_PAGES + bool 64k page size if 44x || PPC64 + select PPC_HAS_HASH_64K if PPC64 + +endchoice config FORCE_MAX_ZONEORDER int Maximum zone order - range 9 64 if PPC_64K_PAGES - default 9 if PPC_64K_PAGES + range 9 64 if PPC64 PPC_64K_PAGES + default 9 if PPC64 PPC_64K_PAGES range 13 64 if PPC64 !PPC_64K_PAGES default 13 if PPC64 !PPC_64K_PAGES + range 9 64 if PPC32 PPC_16K_PAGES + default 9 if PPC32 PPC_16K_PAGES + range 7 64 if PPC32 PPC_64K_PAGES + default 7 if PPC32 PPC_64K_PAGES range 11 64 default 11 help @@ -437,7 +467,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool Support setting protections for 4k subpages - depends on PPC_64K_PAGES + depends on PPC64 PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 91c5895..9875540 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,20 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) +/* + * We use one full pte table with 4K pages. And with 16K/64K pages pte + * table covers enough memory (32MB and 512MB resp.) that both FIXMAP + * and PKMAP can be placed in single pte table. We use 1024 pages for + * PKMAP in case of 16K/64K pages. + */ +#define PKMAP_ORDERmin(PTE_SHIFT, 10) +#define LAST_PKMAP (1 PKMAP_ORDER
[PATCH 03/11] async_tx: add support for asynchronous RAID6 recovery operations
This patch extends async_tx API with two operations for recovery operations on RAID6 array with two failed disks using new async_pqxor() operation. New functions: async_r6_dd_recov() recovers after double data disk failure async_r6_dp_recov() recovers after D+P failure Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- crypto/async_tx/Kconfig |5 + crypto/async_tx/Makefile|1 + crypto/async_tx/async_r6recov.c | 275 +++ include/linux/async_tx.h| 10 ++ 4 files changed, 291 insertions(+), 0 deletions(-) create mode 100644 crypto/async_tx/async_r6recov.c diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index b1705d1..31a0aae 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -18,3 +18,8 @@ config ASYNC_PQXOR tristate select ASYNC_CORE +config ASYNC_R6RECOV + tristate + select ASYNC_CORE + select ASYNC_PQXOR + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 32d6ce2..76fcd43 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_PQXOR) += async_pqxor.o +obj-$(CONFIG_ASYNC_R6RECOV) += async_r6recov.o diff --git a/crypto/async_tx/async_r6recov.c b/crypto/async_tx/async_r6recov.c new file mode 100644 index 000..4c6b100 --- /dev/null +++ b/crypto/async_tx/async_r6recov.c @@ -0,0 +1,275 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov [EMAIL PROTECTED] + * + * Developed for DENX Software Engineering GmbH + * + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * + * based on async_xor.c code written by: + * Dan Williams [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include linux/kernel.h +#include linux/interrupt.h +#include linux/dma-mapping.h +#include linux/raid/xor.h +#include linux/async_tx.h + +#include ../drivers/md/raid6.h + +/** + * async_r6_dd_recov - attempt to calculate two data misses using dma engines. + * @disks: number of disks in the RAID-6 array + * @bytes: size of strip + * @faila: first failed drive index + * @failb: second failed drive index + * @ptrs: array of pointers to strips (last two must be p and q, respectively) + * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK + * @depend_tx: depends on the result of this transaction. + * @cb: function to call when the operation completes + * @cb_param: parameter to pass to the callback routine + */ +struct dma_async_tx_descriptor * +async_r6_dd_recov(int disks, size_t bytes, int faila, int failb, + struct page **ptrs, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb, void *cb_param) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *lptrs[disks]; + unsigned char lcoef[disks - 2]; + int i = 0, k = 0, fc = -1; + uint8_t bc[2]; + dma_async_tx_callback lcb = NULL; + void *lcb_param = NULL; + + /* Assume that failb faila */ + if (faila failb) { + fc = faila; + faila = failb; + failb = fc; + } + + /* +* Try to compute missed data asynchronously. +*/ + + if (disks == 4) { + /* Pxy and Qxy are zero in this case so we already have +* P+Pxy and Q+Qxy in P and Q strips respectively. +*/ + tx = depend_tx; + lcb = cb; + lcb_param = cb_param; + goto do_mult; + } + + /* (1) Calculate Qxy and Pxy: +* Qxy = A(0)*D(0) + ... + A(n-1)*D(n-1) + A(n+1)*D(n+1) + ... + +*A(m-1)*D(m-1) + A(m+1)*D(m+1) + ... + A(disks-1)*D(disks-1), +* where n = faila, m = failb. +*/ + for (i = 0, k = 0; i disks - 2; i++) { + if (i != faila i != failb) { + lptrs[k] = ptrs[i]; + lcoef[k] = raid6_gfexp[i]; + k
[PATCH 02/11] async_tx: add support for asynchronous GF multiplication
This adds support for doing asynchronous GF multiplication by adding four additional functions to async_tx API: async_pqxor() does simultaneous XOR of sources and XOR of sources GF-multiplied by given coefficients. async_pqxor_zero_sum() checks if results of calculations match given ones. async_gen_syndrome() does sumultaneous XOR and R/S syndrome of sources. async_syndrome_zerosum() checks if results of XOR/syndrome calculation matches given ones. Latter two functions just use pqxor with approprite coefficients in asynchronous case but have significant optimizations if synchronous case. To support this API dmaengine driver should set DMA_PQ_XOR and DMA_PQ_ZERO_SUM capabilities and provide device_prep_dma_pqxor and device_prep_dma_pqzero_sum methods in dma_device structure. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- crypto/async_tx/Kconfig |4 + crypto/async_tx/Makefile |1 + crypto/async_tx/async_pqxor.c | 532 + include/linux/async_tx.h | 31 +++ include/linux/dmaengine.h | 11 + 5 files changed, 579 insertions(+), 0 deletions(-) create mode 100644 crypto/async_tx/async_pqxor.c diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb391..b1705d1 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,7 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQXOR + tristate + select ASYNC_CORE + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d..32d6ce2 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQXOR) += async_pqxor.o diff --git a/crypto/async_tx/async_pqxor.c b/crypto/async_tx/async_pqxor.c new file mode 100644 index 000..547d72a --- /dev/null +++ b/crypto/async_tx/async_pqxor.c @@ -0,0 +1,532 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov [EMAIL PROTECTED] + * + * Developed for DENX Software Engineering GmbH + * + * Asynchronous GF-XOR calculations ASYNC_TX API. + * + * based on async_xor.c code written by: + * Dan Williams [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include linux/kernel.h +#include linux/interrupt.h +#include linux/dma-mapping.h +#include linux/raid/xor.h +#include linux/async_tx.h + +#include ../drivers/md/raid6.h + +/** + * The following static variables are used in cases of synchronous + * zero sum to save the values to check. Two pages used for zero sum and + * the third one is for dumb P destination when calling gen_syndrome() + */ +static spinlock_t spare_lock; +struct page *spare_pages[3]; + +/** + * do_async_pqxor - asynchronously calculate P and/or Q + */ +static struct dma_async_tx_descriptor * +do_async_pqxor(struct dma_chan *chan, struct page *pdest, struct page *qdest, + struct page **src_list, unsigned char *scoef_list, + unsigned int offset, unsigned int src_cnt, size_t len, + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) +{ + struct dma_device *dma = chan-device; + struct page *dest; + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[src_cnt]; + unsigned char *scf = qdest ? scoef_list : NULL; + struct dma_async_tx_descriptor *tx; + int i, dst_cnt = 0; + unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + + if (flags ASYNC_TX_XOR_ZERO_DST) + dma_prep_flags |= DMA_PREP_ZERO_DST; + + /* One parity (P or Q) calculation is initiated always; +* first always try Q +*/ + dest = qdest ? qdest : pdest; + dma_dest[dst_cnt++] = dma_map_page(dma-dev, dest, offset, len, + DMA_FROM_DEVICE); + + /* Switch to the next destination */ + if (qdest pdest) { + /* Both destinations are set, thus here we deal with P
[PATCH 04/11] md: run stripe operations outside the lock
The raid_run_ops routine uses the asynchronous offload api and the stripe_operations member of a stripe_head to carry out xor+pqxor+copy operations asynchronously, outside the lock. The operations performed by RAID-6 are the same as in the RAID-5 case except for no support of STRIPE_OP_PREXOR operations. All the others are supported: STRIPE_OP_BIOFILL - copy data into request buffers to satisfy a read request STRIPE_OP_COMPUTE_BLK - generate missing blocks (1 or 2) in the cache from the other blocks STRIPE_OP_BIODRAIN - copy data out of request buffers to satisfy a write request STRIPE_OP_POSTXOR - recalculate parity for new data that has entered the cache STRIPE_OP_CHECK - verify that the parity is correct The flow is the same as in the RAID-5 case. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- drivers/md/Kconfig |2 + drivers/md/raid5.c | 286 include/linux/raid/raid5.h |6 +- 3 files changed, 269 insertions(+), 25 deletions(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2281b50..7731472 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -123,6 +123,8 @@ config MD_RAID456 depends on BLK_DEV_MD select ASYNC_MEMCPY select ASYNC_XOR + select ASYNC_PQXOR + select ASYNC_R6RECOV ---help--- A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a36a743..5b44d71 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -584,18 +584,26 @@ static void ops_run_biofill(struct stripe_head *sh) ops_complete_biofill, sh); } -static void ops_complete_compute5(void *stripe_head_ref) +static void ops_complete_compute(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; - int target = sh-ops.target; - struct r5dev *tgt = sh-dev[target]; + int target, i; + struct r5dev *tgt; pr_debug(%s: stripe %llu\n, __func__, (unsigned long long)sh-sector); - set_bit(R5_UPTODATE, tgt-flags); - BUG_ON(!test_bit(R5_Wantcompute, tgt-flags)); - clear_bit(R5_Wantcompute, tgt-flags); + /* mark the computed target(s) as uptodate */ + for (i = 0; i 2; i++) { + target = (!i) ? sh-ops.target : sh-ops.target2; + if (target 0) + continue; + tgt = sh-dev[target]; + set_bit(R5_UPTODATE, tgt-flags); + BUG_ON(!test_bit(R5_Wantcompute, tgt-flags)); + clear_bit(R5_Wantcompute, tgt-flags); + } + clear_bit(STRIPE_COMPUTE_RUN, sh-state); if (sh-check_state == check_state_compute_run) sh-check_state = check_state_compute_result; @@ -627,15 +635,158 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) if (unlikely(count == 1)) tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + 0, NULL, ops_complete_compute, sh); else tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + ops_complete_compute, sh); + + return tx; +} + +static struct dma_async_tx_descriptor * +ops_run_compute6_1(struct stripe_head *sh) +{ + /* kernel stack size limits the total number of disks */ + int disks = sh-disks; + struct page *srcs[disks]; + int target = sh-ops.target 0 ? sh-ops.target2 : sh-ops.target; + struct r5dev *tgt = sh-dev[target]; + struct page *dest = sh-dev[target].page; + int count = 0; + int pd_idx = sh-pd_idx, qd_idx = raid6_next_disk(pd_idx, disks); + int d0_idx = raid6_next_disk(qd_idx, disks); + struct dma_async_tx_descriptor *tx; + int i; + + pr_debug(%s: stripe %llu block: %d\n, + __func__, (unsigned long long)sh-sector, target); + BUG_ON(!test_bit(R5_Wantcompute, tgt-flags)); + + atomic_inc(sh-count); + + if (target == qd_idx) { + /* We are actually computing the Q drive*/ + i = d0_idx; + do { + srcs[count++] = sh-dev[i].page; + i = raid6_next_disk(i, disks); + } while (i != pd_idx); + /* Synchronous calculations need two destination pages, +* so use P-page too +*/ + tx = async_gen_syndrome(sh-dev[pd_idx].page, dest, + srcs, 0, count, STRIPE_SIZE, + ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute, sh
[PATCH 06/11] md: change handle_stripe_fill6 to work in asynchronous way
Change handle_stripe_fill6 to work asynchronously and introduce helper fetch_block6 function for this. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- drivers/md/raid5.c | 154 1 files changed, 106 insertions(+), 48 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4495df6..2ccecfa 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2226,61 +2226,119 @@ static void handle_stripe_fill5(struct stripe_head *sh, set_bit(STRIPE_HANDLE, sh-state); } -static void handle_stripe_fill6(struct stripe_head *sh, - struct stripe_head_state *s, struct r6_state *r6s, - int disks) +/* fetch_block6 - checks the given member device to see if its data needs + * to be read or computed to satisfy a request. + * + * Returns 1 when no more member devices need to be checked, otherwise returns + * 0 to tell the loop in handle_stripe_fill6 to continue + */ +static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, +struct r6_state *r6s, int disk_idx, int disks) { - int i; - for (i = disks; i--; ) { - struct r5dev *dev = sh-dev[i]; - if (!test_bit(R5_LOCKED, dev-flags) - !test_bit(R5_UPTODATE, dev-flags) - (dev-toread || (dev-towrite -!test_bit(R5_OVERWRITE, dev-flags)) || -s-syncing || s-expanding || -(s-failed = 1 - (sh-dev[r6s-failed_num[0]].toread || - s-to_write)) || -(s-failed = 2 - (sh-dev[r6s-failed_num[1]].toread || - s-to_write { - /* we would like to get this block, possibly -* by computing it, but we might not be able to + struct r5dev *dev = sh-dev[disk_idx]; + struct r5dev *fdev[2] = { sh-dev[r6s-failed_num[0]], + sh-dev[r6s-failed_num[1]] }; + + if (!test_bit(R5_LOCKED, dev-flags) + !test_bit(R5_UPTODATE, dev-flags) + (dev-toread || +(dev-towrite !test_bit(R5_OVERWRITE, dev-flags)) || +s-syncing || s-expanding || +(s-failed = 1 + (fdev[0]-toread || s-to_write)) || +(s-failed = 2 + (fdev[1]-toread || s-to_write { + /* we would like to get this block, possibly by computing it, +* otherwise read it if the backing disk is insync +*/ + BUG_ON(test_bit(R5_Wantcompute, dev-flags)); + BUG_ON(test_bit(R5_Wantread, dev-flags)); + if ((s-uptodate == disks - 1) + (s-failed (disk_idx == r6s-failed_num[0] || + disk_idx == r6s-failed_num[1]))) { + /* have disk failed, and we're requested to fetch it; +* do compute it */ - if ((s-uptodate == disks - 1) - (s-failed (i == r6s-failed_num[0] || - i == r6s-failed_num[1]))) { - pr_debug(Computing stripe %llu block %d\n, - (unsigned long long)sh-sector, i); - compute_block_1(sh, i, 0); - s-uptodate++; - } else if ( s-uptodate == disks-2 s-failed = 2 ) { - /* Computing 2-failure is *very* expensive; only -* do it if failed = 2 + pr_debug(Computing stripe %llu block %d\n, + (unsigned long long)sh-sector, disk_idx); + set_bit(STRIPE_COMPUTE_RUN, sh-state); + set_bit(STRIPE_OP_COMPUTE_BLK, s-ops_request); + set_bit(R5_Wantcompute, dev-flags); + sh-ops.target = disk_idx; + sh-ops.target2 = -1; /* no 2nd target */ + s-req_compute = 1; + s-uptodate++; + return 1; + } else if ( s-uptodate == disks-2 s-failed = 2 ) { + /* Computing 2-failure is *very* expensive; only +* do it if failed = 2 +*/ + int other; + for (other = disks; other--; ) { + if (other == disk_idx) + continue; + if (!test_bit(R5_UPTODATE, + sh-dev[other].flags)) + break; + } + BUG_ON(other 0
[PATCH 08/11] md: asynchronous handle_parity_check6
This patch introduces the state machine for handling the RAID-6 parities check and repair functionality. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- drivers/md/raid5.c | 163 +++- 1 files changed, 110 insertions(+), 53 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c1125cd..963bc4b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2623,91 +2623,148 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, struct r6_state *r6s, struct page *tmp_page, int disks) { - int update_p = 0, update_q = 0; - struct r5dev *dev; + int i; + struct r5dev *devs[2] = {NULL, NULL}; int pd_idx = sh-pd_idx; int qd_idx = r6s-qd_idx; set_bit(STRIPE_HANDLE, sh-state); BUG_ON(s-failed 2); - BUG_ON(s-uptodate disks); + /* Want to check and possibly repair P and Q. * However there could be one 'failed' device, in which * case we can only check one of them, possibly using the * other to generate missing data */ - /* If !tmp_page, we cannot do the calculations, -* but as we have set STRIPE_HANDLE, we will soon be called -* by stripe_handle with a tmp_page - just wait until then. -*/ - if (tmp_page) { + switch (sh-check_state) { + case check_state_idle: + /* start a new check operation if there are 2 failures */ if (s-failed == r6s-q_failed) { /* The only possible failed device holds 'Q', so it * makes sense to check P (If anything else were failed, * we would have used P to recreate it). */ - compute_block_1(sh, pd_idx, 1); - if (!page_is_zero(sh-dev[pd_idx].page)) { - compute_block_1(sh, pd_idx, 0); - update_p = 1; - } + sh-check_state = check_state_run; + set_bit(STRIPE_OP_CHECK_PP, s-ops_request); + clear_bit(R5_UPTODATE, sh-dev[pd_idx].flags); + s-uptodate--; } if (!r6s-q_failed s-failed 2) { /* q is not failed, and we didn't use it to generate * anything, so it makes sense to check it */ - memcpy(page_address(tmp_page), - page_address(sh-dev[qd_idx].page), - STRIPE_SIZE); - compute_parity6(sh, UPDATE_PARITY); - if (memcmp(page_address(tmp_page), - page_address(sh-dev[qd_idx].page), - STRIPE_SIZE) != 0) { - clear_bit(STRIPE_INSYNC, sh-state); - update_q = 1; - } + sh-check_state = check_state_run; + set_bit(STRIPE_OP_CHECK_QP, s-ops_request); + clear_bit(R5_UPTODATE, sh-dev[qd_idx].flags); + s-uptodate--; } - if (update_p || update_q) { - conf-mddev-resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, conf-mddev-recovery)) - /* don't try to repair!! */ - update_p = update_q = 0; + if (sh-check_state == check_state_run) { + break; } - /* now write out any block on a failed drive, -* or P or Q if they need it -*/ + /* we have 2-disk failure */ + BUG_ON(s-failed != 2); + devs[0] = sh-dev[r6s-failed_num[0]]; + devs[1] = sh-dev[r6s-failed_num[1]]; + /* fall through */ + case check_state_compute_result: + sh-check_state = check_state_idle; - if (s-failed == 2) { - dev = sh-dev[r6s-failed_num[1]]; - s-locked++; - set_bit(R5_LOCKED, dev-flags); - set_bit(R5_Wantwrite, dev-flags); + BUG_ON((devs[0] !devs[1]) || + (!devs[0] devs[1])); + + BUG_ON(s-uptodate (disks - 1)); + + if (!devs[0]) { + if (s-failed = 1) + devs[0] = sh-dev[r6s-failed_num[0]]; + else + devs[0] = sh-dev[pd_idx]; } - if (s-failed = 1
[PATCH 10/11] md: remove unused functions
Some clean-up of the replaced or already unnecessary functions. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- drivers/md/raid5.c | 246 1 files changed, 0 insertions(+), 246 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 79e8c74..6bde4da 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1647,245 +1647,6 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) } - -/* - * Copy data between a page in the stripe cache, and one or more bion - * The page could align with the middle of the bio, or there could be - * several bion, each with several bio_vecs, which cover part of the page - * Multiple bion are linked together on bi_next. There may be extras - * at the end of this list. We ignore them. - */ -static void copy_data(int frombio, struct bio *bio, -struct page *page, -sector_t sector) -{ - char *pa = page_address(page); - struct bio_vec *bvl; - int i; - int page_offset; - - if (bio-bi_sector = sector) - page_offset = (signed)(bio-bi_sector - sector) * 512; - else - page_offset = (signed)(sector - bio-bi_sector) * -512; - bio_for_each_segment(bvl, bio, i) { - int len = bio_iovec_idx(bio,i)-bv_len; - int clen; - int b_offset = 0; - - if (page_offset 0) { - b_offset = -page_offset; - page_offset += b_offset; - len -= b_offset; - } - - if (len 0 page_offset + len STRIPE_SIZE) - clen = STRIPE_SIZE - page_offset; - else clen = len; - - if (clen 0) { - char *ba = __bio_kmap_atomic(bio, i, KM_USER0); - if (frombio) - memcpy(pa+page_offset, ba+b_offset, clen); - else - memcpy(ba+b_offset, pa+page_offset, clen); - __bio_kunmap_atomic(ba, KM_USER0); - } - if (clen len) /* hit end of page */ - break; - page_offset += len; - } -} - -#define check_xor()do { \ - if (count == MAX_XOR_BLOCKS) {\ - xor_blocks(count, STRIPE_SIZE, dest, ptr);\ - count = 0;\ - } \ - } while(0) - -static void compute_parity6(struct stripe_head *sh, int method) -{ - raid6_conf_t *conf = sh-raid_conf; - int i, pd_idx = sh-pd_idx, qd_idx, d0_idx, disks = sh-disks, count; - struct bio *chosen; - / FIX THIS: This could be very bad if disks is close to 256 / - void *ptrs[disks]; - - qd_idx = raid6_next_disk(pd_idx, disks); - d0_idx = raid6_next_disk(qd_idx, disks); - - pr_debug(compute_parity, stripe %llu, method %d\n, - (unsigned long long)sh-sector, method); - - switch(method) { - case READ_MODIFY_WRITE: - BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */ - case RECONSTRUCT_WRITE: - for (i= disks; i-- ;) - if ( i != pd_idx i != qd_idx sh-dev[i].towrite ) { - chosen = sh-dev[i].towrite; - sh-dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, sh-dev[i].flags)) - wake_up(conf-wait_for_overlap); - - BUG_ON(sh-dev[i].written); - sh-dev[i].written = chosen; - } - break; - case CHECK_PARITY: - BUG(); /* Not implemented yet */ - } - - for (i = disks; i--;) - if (sh-dev[i].written) { - sector_t sector = sh-dev[i].sector; - struct bio *wbi = sh-dev[i].written; - while (wbi wbi-bi_sector sector + STRIPE_SECTORS) { - copy_data(1, wbi, sh-dev[i].page, sector); - wbi = r5_next_bio(wbi, sector); - } - - set_bit(R5_LOCKED, sh-dev[i].flags); - set_bit(R5_UPTODATE, sh-dev[i].flags); - } - -// switch(method) { -// case RECONSTRUCT_WRITE: -// case CHECK_PARITY: -// case UPDATE_PARITY: - /* Note that unlike RAID-5, the ordering of the disks matters greatly. */ - /* FIX: Is this ordering of drives even remotely optimal
Re: [2/2] powerpc: support for 256K pages on PPC 44x
Hello Milton, Milton Miller wrote: This patch adds support for 256K pages on PPC 44x along with some hacks needed for this. This description is insufficient, it describes neither the hacks nor why they are required. Ok. Actually there is only one hack -- increasing kernel stack size. We do this because with 256K pages we get division by zero in kernel/fork.c: /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half * of memory. */ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); so setting THREAD_SIZE to bigger value we can avoid this. I don't think it's very clean solution but at least we stay powerpc-specific. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9627cfd..7df5528 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -425,6 +425,14 @@ config PPC_64K_PAGES bool 64k page size if 44x || PPC64 select PPC_HAS_HASH_64K if PPC64 +config PPC_256K_PAGES + bool 256k page size if 44x + depends on BROKEN I know it was not your original choice, but I feel BROKEN is too strong. It should be under embedded, and maybe a second choice I am using standard binutils that defaults to yes and is set to no (so that all yes config does not enable it by accident), but I feel labeling this BROKEN for an external dependency is wrong. Hm... maybe you are right. I'm looking forward for additional comments on this. + help + ELF standard supports only page sizes up to 64K so you need a patched + binutils in order to use 256K pages. Chose it only if you know what + you are doing. + endchoice config FORCE_MAX_ZONEORDER diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index dc1132c..0b4ac6a 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,7 +38,8 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#if defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64) +#if defined(CONFIG_PPC_256K_PAGES) || \ + (defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64)) Just because 256K pages is not selectable on PPC64 doesn't mean that this is the right grouping. However, as I said on the previous patch, this file is never included on PPC64 so the clause should be removed. Ok. diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae53..273369a 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -20,7 +20,11 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; +#ifdef CONFIG_PPC_256K_PAGES +#define PTE_SHIFT (PAGE_SHIFT - 7) This seems to be missing the comment on how many ptes are actually in the page that are in the other if and else cases. Ok. I'll fix this. Actually it's another hack: we don't use full page for PTE table because we need to reserve something for PGD +#else #define PTE_SHIFT (PAGE_SHIFT - 3)/* 512 ptes per page */ +#endif #else typedef unsigned long pte_basic_t; #define PTE_SHIFT (PAGE_SHIFT - 2)/* 1024 ptes per page */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9665a26..3c8bbab 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -15,8 +15,12 @@ #ifdef CONFIG_PPC64 #define THREAD_SHIFT 14 #else +#ifdef CONFIG_PPC_256K_PAGES +#define THREAD_SHIFT 15 +#else #define THREAD_SHIFT 13 #endif +#endif #define THREAD_SIZE(1 THREAD_SHIFT) So this appears to be the one hack. For some unknown reason, you are increasing the kernel stack from 8k to 32k when selecting 256k pages. What data structure is ballooning in size so much that you need the additional kernel stack space on 256k pages but not on 64k pages? Is this really tied to 256k base page size? We don't really need additional stack space. Just trying to avoid division by zero. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures
Hello Milton, Milton Miller wrote: I started out looking at the too minimal decription of patch 2/2, and that morphed into talking about both patches. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e..9627cfd 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool 64k page size - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt Page size + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines + The PAGE_SIZE definition. Increasing the page size may + improve the system performance in some dedicated cases like software + RAID with accelerated calculations. In PPC64 case on machines without processor support for 64k pages, the kernel will simulate them by loading each individual 4k page on demand transparently, while on hardware with such support, it will be used to map normal application pages. + If unsure, set it to 4 KB. + This is less understandable (more hacker jargon) and too application specific. (Josh, since this is cross-sub-platform we need to make sure this fragment gets proper review). Also, we need to check the help placement, as I seem to remember the config programs looking at the first choice instead of the choice tag. Or should the help be split by option? Help at the choice tag works properly. Lets try this Select the kernel logical page size. Increasing the page size will reduce software overhead at each page boundary, allow hardware prefetch mechanisms to be more effective, and allow larger dma transfers increasing IO efficiency and reducing overhead. However the utilization of memory will increase. For example, each cached file will using a multiple of the page size to hold its contents and the difference between the end of file and the end of page is wasted. Some dedicated systems, such as software raid serving with accelerated calculations, have shown significant increases. If you configure a 64 bit kernel for 64k pages but the processor does not support them, then the kernel will simulate them with 4k pages, loading them on demand, but with the reduced software overhead and larger internal fragmentation. For the 32 bit kernel, a large page option will not be offered unless it is supported by the configured processor. If unsure, choose 4K_PAGES. This looks much better for me. I'll include this help message in updated patch. +config PPC_4K_PAGES + bool 4k page size + +config PPC_16K_PAGES + bool 16k page size if 44x + +config PPC_64K_PAGES + bool 64k page size if 44x || PPC64 + select PPC_HAS_HASH_64K if PPC64 + +endchoice diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 5d99b64..dc1132c 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ +#if defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64) In patch 2/2 I was going to comment about the precedence of PPC64 vs 64K_PAGES, but then I realized this file is only included when CONFIG_HIGHMEM is set and that depends on PPC32 , so it will never be set. Please remove the additional noise !defined(CONFIG_PPC64). Ok. +#define PKMAP_ORDER(27 - PAGE_SHIFT) where did the value 27 come from? Hm... It's pretty much experimental. There is the range of values which gives us a proper virtual memory map (VMALLOC_BEGIN VMALLOC_END) and I have no clean idea which one we should use. +#define LAST_PKMAP (1 PKMAP_ORDER) +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) +#else #define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) PMD_MASK) +#endif +#define LAST_PKMAP_MASK(LAST_PKMAP-1) and why not set PKMAP_ORDER on both sides of the else, keepign LAST_PKMAP common? We can do this but I can't see much sense here... We still need to define PKMAP_BASE differently. #define PKMAP_NR(virt) ((virt-PKMAP_BASE) PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) PAGE_SHIFT)) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index dbb8ca1..0d447fb 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -39,6 +39,9 @@ extern void paging_init(void); #include asm-generic/pgtable.h +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1)
[RFC PATCH] Support for big page sizes on 44x (Updated)
These patches add support for selecting page size on PPC 44x. First one adds support for 16K/64K pages while second one adds support for 256K pages along with some hacks. However there are still number of problems: 1. We can't use default PKMAP_BASE definition with 64KB/256KB pages so we change it. Not sure that it's optimal. Then redefined PKMAP_BASE is not aligned on (1PMD_SHIFT), don't know if it is really bad. 2. with 16KB/64KB/256KB pages WARN_ON(!pmd_none(*pmd)) is triggered inside dma_alloc_init() function. Not sure if it is really bad. 3. with 256KB pages ENTRIES_PER_PAGEPAGE in mm/shem.c become zero. 4. We use asm-offsets mechanism to make PTE_SHIFT/PMD_SHIFT available in assembler but we don't really need the power of asm-offsets here. Maybe it will be more convinient to just take these defines out of #ifndef __ASSEMBLY__? But this would change asm-generic... We would appreciate any comment. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures.
This patch adds support for page sizes bigger than 4K (16K/64K) on PPC 44x. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Vladimir Panfilov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig | 26 -- arch/powerpc/include/asm/highmem.h |8 +++- arch/powerpc/include/asm/mmu-44x.h | 18 ++ arch/powerpc/include/asm/page.h| 13 - arch/powerpc/include/asm/pgtable.h |3 +++ arch/powerpc/kernel/asm-offsets.c |4 arch/powerpc/kernel/head_44x.S | 22 +- arch/powerpc/kernel/misc_32.S | 12 ++-- arch/powerpc/mm/pgtable_32.c |9 ++--- arch/powerpc/platforms/Kconfig.cputype |2 +- 10 files changed, 82 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e..9627cfd 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool 64k page size - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt Page size + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines + The PAGE_SIZE definition. Increasing the page size may + improve the system performance in some dedicated cases like software + RAID with accelerated calculations. In PPC64 case on machines without processor support for 64k pages, the kernel will simulate them by loading each individual 4k page on demand transparently, while on hardware with such support, it will be used to map normal application pages. + If unsure, set it to 4 KB. + +config PPC_4K_PAGES + bool 4k page size + +config PPC_16K_PAGES + bool 16k page size if 44x + +config PPC_64K_PAGES + bool 64k page size if 44x || PPC64 + select PPC_HAS_HASH_64K if PPC64 + +endchoice config FORCE_MAX_ZONEORDER int Maximum zone order @@ -435,7 +449,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool Support setting protections for 4k subpages - depends on PPC_64K_PAGES + depends on PPC64 PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 5d99b64..dc1132c 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ +#if defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64) +#define PKMAP_ORDER(27 - PAGE_SHIFT) +#define LAST_PKMAP (1 PKMAP_ORDER) +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) +#else #define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) PMD_MASK) +#endif +#define LAST_PKMAP_MASK(LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) PAGE_SHIFT)) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index a825524..2ca18e8 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -4,6 +4,8 @@ * PPC440 support */ +#include asm/page.h + #define PPC44x_MMUCR_TID 0x00ff #define PPC44x_MMUCR_STS 0x0001 @@ -73,4 +75,20 @@ typedef struct { /* Size of the TLBs used for pinning in lowmem */ #define PPC_PIN_SIZE (1 28) /* 256M */ +#if (PAGE_SHIFT == 12) +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#elif (PAGE_SHIFT == 14) +#define PPC44x_TLBE_SIZE PPC44x_TLB_16K +#elif (PAGE_SHIFT == 16) +#define PPC44x_TLBE_SIZE PPC44x_TLB_64K +#else +#error Unsupported PAGE_SIZE +#endif + +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) +#define PPC44x_PGD_OFF_MASK(PMD_SHIFT - 2) +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) +#define PPC44x_PTE_ADD_MASK(32 - 3 - PTE_SHIFT) +#define PPC44x_RPN_MASK(31 - PAGE_SHIFT) + #endif /* _ASM_POWERPC_MMU_44X_H_ */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e088545..537d5b1 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -15,12 +15,15 @@ #include asm/types.h /* - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages + * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we
[PATCH 2/2] powerpc: support for 256K pages on PPC 44x
This patch adds support for 256K pages on PPC 44x along with some hacks needed for this. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Vladimir Panfilov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig |8 arch/powerpc/include/asm/highmem.h |3 ++- arch/powerpc/include/asm/mmu-44x.h |2 ++ arch/powerpc/include/asm/page.h|6 -- arch/powerpc/include/asm/page_32.h |4 arch/powerpc/include/asm/thread_info.h |4 arch/powerpc/kernel/head_booke.h | 11 +-- 7 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9627cfd..7df5528 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -425,6 +425,14 @@ config PPC_64K_PAGES bool 64k page size if 44x || PPC64 select PPC_HAS_HASH_64K if PPC64 +config PPC_256K_PAGES + bool 256k page size if 44x + depends on BROKEN + help + ELF standard supports only page sizes up to 64K so you need a patched + binutils in order to use 256K pages. Chose it only if you know what + you are doing. + endchoice config FORCE_MAX_ZONEORDER diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index dc1132c..0b4ac6a 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,7 +38,8 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#if defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64) +#if defined(CONFIG_PPC_256K_PAGES) || \ + (defined(CONFIG_PPC_64K_PAGES) !defined(CONFIG_PPC64)) #define PKMAP_ORDER(27 - PAGE_SHIFT) #define LAST_PKMAP (1 PKMAP_ORDER) #define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 2ca18e8..b943462 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -81,6 +81,8 @@ typedef struct { #define PPC44x_TLBE_SIZE PPC44x_TLB_16K #elif (PAGE_SHIFT == 16) #define PPC44x_TLBE_SIZE PPC44x_TLB_64K +#elif (PAGE_SHIFT == 18) +#define PPC44x_TLBE_SIZE PPC44x_TLB_256K #else #error Unsupported PAGE_SIZE #endif diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 537d5b1..f42c918 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -15,12 +15,14 @@ #include asm/types.h /* - * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages + * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#if defined(CONFIG_PPC_64K_PAGES) +#if defined(CONFIG_PPC_256K_PAGES) +#define PAGE_SHIFT 18 +#elif defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 #elif defined(CONFIG_PPC_16K_PAGES) #define PAGE_SHIFT 14 diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae53..273369a 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -20,7 +20,11 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; +#ifdef CONFIG_PPC_256K_PAGES +#define PTE_SHIFT (PAGE_SHIFT - 7) +#else #define PTE_SHIFT (PAGE_SHIFT - 3)/* 512 ptes per page */ +#endif #else typedef unsigned long pte_basic_t; #define PTE_SHIFT (PAGE_SHIFT - 2)/* 1024 ptes per page */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9665a26..3c8bbab 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -15,8 +15,12 @@ #ifdef CONFIG_PPC64 #define THREAD_SHIFT 14 #else +#ifdef CONFIG_PPC_256K_PAGES +#define THREAD_SHIFT 15 +#else #define THREAD_SHIFT 13 #endif +#endif #define THREAD_SIZE(1 THREAD_SHIFT) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fce2df9..acd4b47 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -9,6 +9,13 @@ li r26,[EMAIL PROTECTED]; \ mtspr SPRN_IVOR##vector_number,r26; \ sync +#ifndef CONFIG_PPC_256K_PAGES +#define ALLOC_STACK_FRAME(reg, val)addireg,reg,val +#else +#define ALLOC_STACK_FRAME(reg, val)\ + addis reg,reg,[EMAIL PROTECTED]; \ + addireg,reg,[EMAIL PROTECTED] +#endif #define NORMAL_EXCEPTION_PROLOG \ mtspr
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hello David, David Gibson wrote: I don't see any reason to have a separate set of config options for 32 and 64-bit. Just make the once choice, but only have the individual pagesize options enabled on machines that support them. Well. I can see some. First, on PPC64 kernel emulates 64K pages on hardware that can't do it and we are not going to do such an emulation on PPC32 now. Then CONFIG_PPC_64K_PAGES selects PPC_HAS_HASH_64K and our code has nothing to do with it. And last but not least, we don't use PPC64 kernels for now so we just tried not to break something we can't test. But if everybody thinks that having a single option is a good idea I'll do it that way. I don't think you should need a real_pte_t type for the 32-bit implementation. It's just there because of how we implement 64k granularity page allocation on hardware that only does 4k translations. You are right. Thanks. diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae53..d176270 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -20,7 +20,11 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; +#ifdef CONFIG_PPC32_256K_PAGES +#define PTE_SHIFT (PAGE_SHIFT - 7) This doesn't look right. You should be eliding one of the levels of page table if you don't need it, rather than leaving the bottom level PTE page largely empty. Hm... We have only two levels really so if we elide one there will be only one left. Don't sure if kernel can work with this... +#if (PAGE_SHIFT == 12) +/* + * PAGE_SIZE 4K + * PAGE_SHIFT 12 + * PTE_SHIFT 9 + * PMD_SHIFT 21 + */ +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#define PPC44x_PGD_OFF_SH 13 /*(32 - PMD_SHIFT + 2)*/ +#define PPC44x_PGD_OFF_M1 19 /*(PMD_SHIFT - 2)*/ +#define PPC44x_PTE_ADD_SH 23 /*32 - PMD_SHIFT + PTE_SHIFT + 3*/ +#define PPC44x_PTE_ADD_M1 20 /*32 - 3 - PTE_SHIFT*/ +#define PPC44x_RPN_M2 19 /*31 - PAGE_SHIFT*/ Uh.. you have the formulae for these things right there in the comments, so why aren't you using those and avoiding this nasty multiway ifdef... We need to get PMD_SHIFT and friends out of #ifndef __ASSEMBLY__ for that. And some of them are under include/asm-generic so patch becomes not powerpc-specific... diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9665a26..4e7cd1f 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -15,8 +15,12 @@ #ifdef CONFIG_PPC64 #define THREAD_SHIFT 14 #else +#if defined(CONFIG_PPC32_256K_PAGES) +#define THREAD_SHIFT 15 Hrm.. more peculiar special cases for 256K pages. I think it might be clearer if you split the patch into one which supports page sizes up to 64k, then another that does the extra hacks for 256k pages. Agreed. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fce2df9..4f802df 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -20,7 +20,9 @@ beq 1f; \ mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ - addir1,r1,THREAD_SIZE; \ + lis r11,[EMAIL PROTECTED]; \ + ori r11,r11,[EMAIL PROTECTED]; \ + add r1,r1,r11; \ It would be nice if we could avoid the extra instruction here when the page sizes isn't big enough to require it. Ok. This is going to go to 256K-dirty-hacks.patch anyway. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hello Benjamin, Benjamin Herrenschmidt wrote: [snip] diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fce2df9..4f802df 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -20,7 +20,9 @@ beq 1f; \ mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ - addir1,r1,THREAD_SIZE; \ + lis r11,[EMAIL PROTECTED]; \ + ori r11,r11,[EMAIL PROTECTED]; \ + add r1,r1,r11; \ It would be nice if we could avoid the extra instruction here when the page sizes isn't big enough to require it. As a matter of fact, I don't see why THREAD_SIZE should ever need that, there is no reason to change the kernel stack size. Well, this was introduced because of that: kernel/fork.c [179]: max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); With THREAD_SIZE=8K we will get mempages divided by zero if PAGE_SIZE is bigger than 64K. (Well, another reason not to use 64K pages). Not sure this is the right way to fix it. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hi, prodyut hazarika wrote: +choice + prompt Page size + depends on 44x PPC32 + default PPC32_4K_PAGES + help + The PAGE_SIZE definition. Increasing the page size may + improve the system performance in some dedicated cases. + If unsure, set it to 4 KB. + You should mention an example of dedicated cases (eg. RAID). I think this help should mention that for page size 256KB, you will need to have a special version of binutils, since the ELF standard mentions page sizes only upto 64KB. Agreed. -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC32_256K_PAGES) +#define PAGE_SHIFT 18 +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 +#elif defined(CONFIG_PPC32_16K_PAGES) +#define PAGE_SHIFT 14 #else #define PAGE_SHIFT 12 #endif Why should the new defines be inside CONFIG_PPC_64K_PAGES? The I think you missed first '-' on the first line. definition CONFIG_PPC_64K_PAGES is repeated. Shouldn't these defines be like this: #if defined(CONFIG_PPC32_256K_PAGES) #define PAGE_SHIFT 18 #elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 #elif defined(CONFIG_PPC32_16K_PAGES) #define PAGE_SHIFT 14 #else #define PAGE_SHIFT 12 #endif And they do actually :) Please change PPC44x_PGD_OFF_SH to PPC44x_PGD_OFF_SHIFT. SH sounds very confusing. I don't like the MI and M2 names too. Change PPC44x_RPN_M2 to PPC44x_RPN_MASK. Change M1 to MASK in PPC44x_PGD_OFF_M1 and PPC44x_PTE_ADD_M1 . Agreed. Is there no way a define like #define PPC44x_PGD_OFF_SH (32 - PMD_SHIFT + 2) be used in assembly file. If yes, we can avoid repeating the defines. We can use defined like this, problem is that PMD_SHIFT and PTE_SHIFT declared inside #ifndef __ASSEMBLY__ I think these 44x specific defines should go to asm/mmu-44x.h since I Agreed. For 256KB page size, I cannot understand why PTE_SHIFT is 11. Since each PTE entry is 8 byte, PTE_SHIFT should have been 15. But then there would be no bits in the Effective address for the 1st level PGDIR offset. On what basis PTE_SHIFT of 11 is chosen? This overflow problem happens only for 256KB page size. I think Yuri has commented on this already. Any comments on the issues mentioned in introductory message? Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hi, prodyut hazarika wrote: Also, it would be great if you could point me what changes are necessary to recompile the binutils. I would like to test the 256KB changes on my Canyonlands board. I have got 16KB/64KB working. I think this should be enough: --- binutils-2.16.1/ld/emulparams/elf32ppc.sh.orig2007-08-21 14:18:56.0 +0200 +++ binutils-2.16.1/ld/emulparams/elf32ppc.sh2007-08-21 14:19:42.0 +0200 @@ -8,7 +8,7 @@ GENERATE_PIE_SCRIPT=yes SCRIPT_NAME=elf OUTPUT_FORMAT=elf32-powerpc TEXT_START_ADDR=0x0180 -MAXPAGESIZE=0x1 +MAXPAGESIZE=0x4 COMMONPAGESIZE=0x1000 ARCH=powerpc:common MACHINE= --- binutils-2.16.1/bfd/elf32-ppc.c.orig2007-09-04 13:11:29.0 +0200 +++ binutils-2.16.1/bfd/elf32-ppc.c2007-09-04 13:10:25.0 +0200 @@ -6197,7 +6197,7 @@ #ifdef __QNXTARGET__ #define ELF_MAXPAGESIZE0x1000 #else -#define ELF_MAXPAGESIZE0x1 +#define ELF_MAXPAGESIZE0x4 #endif #define ELF_MINPAGESIZE0x1000 #define elf_info_to_howtoppc_elf_info_to_howto And you need to rebuild the whole RFS with patched binutils of cause. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hi, prodyut hazarika wrote: In file arch/powerpc/mm/pgtable_32.c, we have: #ifdef CONFIG_PTE_64BIT /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ #define PGDIR_ORDER 1 #else #define PGDIR_ORDER 0 #endif pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); return ret; } Thus, we allocate 2 pages for 44x processors for PGD. This is needed only for 4K page. We are anyway not using the whole 64K or 256K page for the PGD. So there is no point to waste an additional 64K or 256KB page Ok. Not sure I'm right but I think 16K case doesn't need second page too. (PGDIR_SHIFT=25, so sizeof(pgd_t)(32-PGDIR_SHIFT) 16KB) Change this to: #ifdef CONFIG_PTE_64BIT #if (PAGE_SHIFT == 12) I think #ifdef CONFIG_PTE_64BIT is a little bit confusing here... Actually PGDIR_ORDER should be something like max(32 + 2 - PGDIR_SHIFT - PAGE_SHIFT, 0) /* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ #define PGDIR_ORDER 1 #else #define PGDIR_ORDER 0 #endif #else #define PGDIR_ORDER 0 #endif Yuri, any comments? Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
Hello Josh, Josh Boyer wrote: Ok, but not everyone does. And I think setting the page size to this should be harder, maybe even dependent upon CONFIG_BROKEN. Well, we are violating ELF standard here... CONFIG_BROKEN seems to be adequate for me. I need to look over the patch a bit more, but some of the comments you've already gotten seem valid. I'll address them and post updated patch in a few days. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[RFC PATCH] Support for big page sizes on 44x
This adds support for selecting page size on PPC 44x. However there are still number of problems: 1. We can't use default PKMAP_BASE definition with 64KB/256KB pages so we change it. Not sure that it's optimal. Then redefined PKMAP_BASE is not aligned on (1PMD_SHIFT), don't know if it is really bad. 2. with 16KB/64KB/256KB pages WARN_ON(!pmd_none(*pmd)) is triggered inside dma_alloc_init() function. Not sure if it is really bad. 3. with 256KB pages ENTRIES_PER_PAGEPAGE in mm/shem.c become zero. Second patch tries to address this issue. 4. Ugly ppc_page_asm.h file. I'd prefer to see these defines calculated instead of being hardcoded but PTE_SHIFT and PMD_SHIFT are declared inside #ifndef __ASSEMBLY__. Don't know why. We would appreciate any comment. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] powerpc: add support for PAGE_SIZEs greater than 4KB for
This patch adds support for page sizes bigger than 4KB (16KB/64KB/256KB) on PPC 44x. Signed-off-by: Yuri Tikhonov [EMAIL PROTECTED] Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/Kconfig| 23 + arch/powerpc/include/asm/highmem.h |8 +++- arch/powerpc/include/asm/page.h | 26 +- arch/powerpc/include/asm/page_32.h |4 ++ arch/powerpc/include/asm/ppc_page_asm.h | 75 +++ arch/powerpc/include/asm/thread_info.h |4 ++ arch/powerpc/kernel/head_44x.S | 21 +--- arch/powerpc/kernel/head_booke.h|7 ++- arch/powerpc/kernel/misc_32.S | 13 +++--- arch/powerpc/mm/pgtable_32.c|2 +- 10 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 arch/powerpc/include/asm/ppc_page_asm.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e..ca93157 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -413,6 +413,29 @@ config PPC_64K_PAGES while on hardware with such support, it will be used to map normal application pages. +choice + prompt Page size + depends on 44x PPC32 + default PPC32_4K_PAGES + help + The PAGE_SIZE definition. Increasing the page size may + improve the system performance in some dedicated cases. + If unsure, set it to 4 KB. + +config PPC32_4K_PAGES + bool 4k page size + +config PPC32_16K_PAGES + bool 16k page size + +config PPC32_64K_PAGES + bool 64k page size + +config PPC32_256K_PAGES + bool 256k page size + +endchoice + config FORCE_MAX_ZONEORDER int Maximum zone order default 9 if PPC_64K_PAGES diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 5d99b64..1aec96d 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ +#if defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC32_256K_PAGES) +#define PKMAP_ORDER(27 - PAGE_SHIFT) +#define LAST_PKMAP (1 PKMAP_ORDER) +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) +#else #define LAST_PKMAP (1 PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) PMD_MASK) +#endif +#define LAST_PKMAP_MASK(LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) PAGE_SHIFT)) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e088545..1de90b4 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -15,12 +15,17 @@ #include asm/types.h /* - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages + * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC32_256K_PAGES) +#define PAGE_SHIFT 18 +#elif defined(CONFIG_PPC32_64K_PAGES) || defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 +#elif defined(CONFIG_PPC32_16K_PAGES) +#define PAGE_SHIFT 14 #else #define PAGE_SHIFT 12 #endif @@ -140,11 +145,19 @@ typedef struct { pte_basic_t pte; } pte_t; /* 64k pages additionally define a bigger real PTE type that gathers * the second half part of the PTE for pseudo 64k pages */ +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; #endif +#else +#ifdef CONFIG_PPC32_4K_PAGES +typedef struct { pte_t pte; } real_pte_t; +#else +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#endif +#endif /* !CONFIG_PPC64 */ /* PMD level */ #ifdef CONFIG_PPC64 @@ -180,12 +193,19 @@ typedef pte_basic_t pte_t; #define pte_val(x) (x) #define __pte(x) (x) +#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef unsigned long real_pte_t; #endif - +#else +#ifdef CONFIG_PPC32_4K_PAGES +typedef unsigned long real_pte_t; +#else +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#endif +#endif /* !PPC64 */ #ifdef CONFIG_PPC64 typedef unsigned long pmd_t; diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae53..d176270 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -20,7 +20,11 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; +#ifdef CONFIG_PPC32_256K_PAGES
[PATCH] mm: fix ENTRIES_PER_PAGEPAGE overflow with 256KB pages
ENTRIES_PER_PAGEPAGE define in mm/shmem.c becomes zero if page size is 256KB. This patch fixes this. Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- mm/shmem.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 04fb4f1..c603427 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -59,7 +59,7 @@ #define TMPFS_MAGIC0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) +#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) @@ -519,7 +519,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) struct shmem_inode_info *info = SHMEM_I(inode); unsigned long idx; unsigned long size; - unsigned long limit; + unsigned long long limit; unsigned long stage; unsigned long diroff; struct page **dir; @@ -535,7 +535,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) int punch_hole; spinlock_t *needs_lock; spinlock_t *punch_lock; - unsigned long upper_limit; + unsigned long long upper_limit; inode-i_ctime = inode-i_mtime = CURRENT_TIME; idx = (start + PAGE_CACHE_SIZE - 1) PAGE_CACHE_SHIFT; -- 1.5.6.1 ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] ppc4xx_pci: necessary fixes for 4GB RAM size
Benjamin Herrenschmidt wrote: 1. total_memory should be phys_addr_t not unsigned long 2. is_power_of_2() works with u32 so I just inlined (size (size-1)) != 0 instead. Also this patch fixes default initialization: res-end should be 0x7fff not 0x8000. Signed-off-by: Ilya Yanok [EMAIL PROTECTED] Ben, any comments here? Looks right to me. Just one minor comment... The patch should do what I failed to do before, which is to move total_memory declaration to a header :-) Hm... looks like we already have this declaration in arch/powerpc/mm/mmu_decl.h... I'll send modified patch soon. Regards, Ilya. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] ppc4xx_pci: necessary fixes for 4GB RAM size (updated)
1. declaration of total_memory removed. Now including mm/mmu_decl.h instead. 2. is_power_of_2() works with u32 so I just inlined (size (size-1)) != 0 instead. Also this patch fixes default initialization: res-end should be 0x7fff not 0x8000. Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/sysdev/ppc4xx_pci.c | 13 ++--- 1 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index e1c7df9..e899716 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -30,14 +30,12 @@ #include asm/machdep.h #include asm/dcr.h #include asm/dcr-regs.h +#include mm/mmu_decl.h #include ppc4xx_pci.h static int dma_offset_set; -/* Move that to a useable header */ -extern unsigned long total_memory; - #define U64_TO_U32_LOW(val)((u32)((val) 0xULL)) #define U64_TO_U32_HIGH(val) ((u32)((val) 32)) @@ -105,7 +103,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, /* Default */ res-start = 0; - res-end = size = 0x8000; + size = 0x8000; + res-end = size - 1; res-flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; /* Get dma-ranges property */ @@ -167,13 +166,13 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, */ if (size total_memory) { printk(KERN_ERR %s: dma-ranges too small - (size=%llx total_memory=%lx)\n, - hose-dn-full_name, size, total_memory); + (size=%llx total_memory=%llx)\n, + hose-dn-full_name, size, (u64)total_memory); return -ENXIO; } /* Check we are a power of 2 size and that base is a multiple of size*/ - if (!is_power_of_2(size) || + if ((size (size - 1)) != 0 || (res-start (size - 1)) != 0) { printk(KERN_ERR %s: dma-ranges unaligned\n, hose-dn-full_name); -- 1.5.6.1 ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] ppc4xx_pci: necessary fixes for 4GB RAM size
1. total_memory should be phys_addr_t not unsigned long 2. is_power_of_2() works with u32 so I just inlined (size (size-1)) != 0 instead. Also this patch fixes default initialization: res-end should be 0x7fff not 0x8000. Signed-off-by: Ilya Yanok [EMAIL PROTECTED] --- arch/powerpc/sysdev/ppc4xx_pci.c | 11 ++- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index e1c7df9..645b2c9 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -36,7 +36,7 @@ static int dma_offset_set; /* Move that to a useable header */ -extern unsigned long total_memory; +extern phys_addr_t total_memory; #define U64_TO_U32_LOW(val)((u32)((val) 0xULL)) #define U64_TO_U32_HIGH(val) ((u32)((val) 32)) @@ -105,7 +105,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, /* Default */ res-start = 0; - res-end = size = 0x8000; + size = 0x8000; + res-end = size - 1; res-flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; /* Get dma-ranges property */ @@ -167,13 +168,13 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, */ if (size total_memory) { printk(KERN_ERR %s: dma-ranges too small - (size=%llx total_memory=%lx)\n, - hose-dn-full_name, size, total_memory); + (size=%llx total_memory=%llx)\n, + hose-dn-full_name, size, (u64)total_memory); return -ENXIO; } /* Check we are a power of 2 size and that base is a multiple of size*/ - if (!is_power_of_2(size) || + if ((size (size - 1)) != 0 || (res-start (size - 1)) != 0) { printk(KERN_ERR %s: dma-ranges unaligned\n, hose-dn-full_name); -- 1.5.6.1 ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev