Author: mpagano Date: 2014-05-13 14:17:59 +0000 (Tue, 13 May 2014) New Revision: 2785
Added: genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch Modified: genpatches-2.6/trunk/3.4/0000_README Log: Linux patch 3.4.90 Modified: genpatches-2.6/trunk/3.4/0000_README =================================================================== --- genpatches-2.6/trunk/3.4/0000_README 2014-05-13 13:57:00 UTC (rev 2784) +++ genpatches-2.6/trunk/3.4/0000_README 2014-05-13 14:17:59 UTC (rev 2785) @@ -395,6 +395,10 @@ From: http://www.kernel.org Desc: Linux 3.4.89 +Patch: 1089_linux-3.4.90.patch +From: http://www.kernel.org +Desc: Linux 3.4.90 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. Added: genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch =================================================================== --- genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch (rev 0) +++ genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch 2014-05-13 14:17:59 UTC (rev 2785) @@ -0,0 +1,1388 @@ +diff --git a/Makefile b/Makefile +index 2c2ec2cedd52..aa1001213eb1 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 3 + PATCHLEVEL = 4 +-SUBLEVEL = 89 ++SUBLEVEL = 90 + EXTRAVERSION = + NAME = Saber-toothed Squirrel + +diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S +index f8a751c03282..5bf34ec89669 100644 +--- a/arch/mips/power/hibernate.S ++++ b/arch/mips/power/hibernate.S +@@ -44,6 +44,7 @@ LEAF(swsusp_arch_resume) + bne t1, t3, 1b + PTR_L t0, PBE_NEXT(t0) + bnez t0, 0b ++ jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */ + PTR_LA t0, saved_regs + PTR_L ra, PT_R31(t0) + PTR_L sp, PT_R29(t0) +diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S +index 1eb7f90cb7b9..eb4d2a254b35 100644 +--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S ++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S +@@ -24,10 +24,6 @@ + .align 16 + .Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +-.Lpoly: +- .octa 0xc2000000000000000000000000000001 +-.Ltwo_one: +- .octa 0x00000001000000000000000000000001 + + #define DATA %xmm0 + #define SHASH %xmm1 +@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update) + movups DATA, (%rdi) + .Lupdate_just_ret: + ret +- +-/* +- * void clmul_ghash_setkey(be128 *shash, const u8 *key); +- * +- * Calculate hash_key << 1 mod poly +- */ +-ENTRY(clmul_ghash_setkey) +- movaps .Lbswap_mask, BSWAP +- movups (%rsi), %xmm0 +- PSHUFB_XMM BSWAP %xmm0 +- movaps %xmm0, %xmm1 +- psllq $1, %xmm0 +- psrlq $63, %xmm1 +- movaps %xmm1, %xmm2 +- pslldq $8, %xmm1 +- psrldq $8, %xmm2 +- por %xmm1, %xmm0 +- # reduction +- pshufd $0b00100100, %xmm2, %xmm1 +- pcmpeqd .Ltwo_one, %xmm1 +- pand .Lpoly, %xmm1 +- pxor %xmm1, %xmm0 +- movups %xmm0, (%rdi) +- ret +diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c +index b4bf0a63b520..c07446d17463 100644 +--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c ++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c +@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash); + void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +-void clmul_ghash_setkey(be128 *shash, const u8 *key); +- + struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; + }; +@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) + { + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); ++ be128 *x = (be128 *)key; ++ u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + +- clmul_ghash_setkey(&ctx->shash, key); ++ /* perform multiplication by 'x' in GF(2^128) */ ++ a = be64_to_cpu(x->a); ++ b = be64_to_cpu(x->b); ++ ++ ctx->shash.a = (__be64)((b << 1) | (a >> 63)); ++ ctx->shash.b = (__be64)((a << 1) | (b >> 63)); ++ ++ if (a >> 63) ++ ctx->shash.b ^= cpu_to_be64(0xc2); + + return 0; + } +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index f2f37171e21a..6e67fdebdada 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) + static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) + { + struct ata_queued_cmd *qc = NULL; +- unsigned int i; ++ unsigned int i, tag; + + /* no command while frozen */ + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + return NULL; + +- /* the last tag is reserved for internal command. */ +- for (i = 0; i < ATA_MAX_QUEUE - 1; i++) +- if (!test_and_set_bit(i, &ap->qc_allocated)) { +- qc = __ata_qc_from_tag(ap, i); ++ for (i = 0; i < ATA_MAX_QUEUE; i++) { ++ tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE; ++ ++ /* the last tag is reserved for internal command. */ ++ if (tag == ATA_TAG_INTERNAL) ++ continue; ++ ++ if (!test_and_set_bit(tag, &ap->qc_allocated)) { ++ qc = __ata_qc_from_tag(ap, tag); ++ qc->tag = tag; ++ ap->last_tag = tag; + break; + } +- +- if (qc) +- qc->tag = i; ++ } + + return qc; + } +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c +index c82f06e639f8..2cac6e64b67d 100644 +--- a/drivers/block/floppy.c ++++ b/drivers/block/floppy.c +@@ -3058,7 +3058,10 @@ static int raw_cmd_copyout(int cmd, void __user *param, + int ret; + + while (ptr) { +- ret = copy_to_user(param, ptr, sizeof(*ptr)); ++ struct floppy_raw_cmd cmd = *ptr; ++ cmd.next = NULL; ++ cmd.kernel_data = NULL; ++ ret = copy_to_user(param, &cmd, sizeof(cmd)); + if (ret) + return -EFAULT; + param += sizeof(struct floppy_raw_cmd); +@@ -3112,10 +3115,11 @@ loop: + return -ENOMEM; + *rcmd = ptr; + ret = copy_from_user(ptr, param, sizeof(*ptr)); +- if (ret) +- return -EFAULT; + ptr->next = NULL; + ptr->buffer_length = 0; ++ ptr->kernel_data = NULL; ++ if (ret) ++ return -EFAULT; + param += sizeof(struct floppy_raw_cmd); + if (ptr->cmd_count > 33) + /* the command may now also take up the space +@@ -3131,7 +3135,6 @@ loop: + for (i = 0; i < 16; i++) + ptr->reply[i] = 0; + ptr->resultcode = 0; +- ptr->kernel_data = NULL; + + if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { + if (ptr->length <= 0) +diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c +index 385c58e8405b..0f8114de0877 100644 +--- a/drivers/gpio/gpio-mxs.c ++++ b/drivers/gpio/gpio-mxs.c +@@ -167,7 +167,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port) + ct->regs.ack = PINCTRL_IRQSTAT(port->id) + MXS_CLR; + ct->regs.mask = PINCTRL_IRQEN(port->id); + +- irq_setup_generic_chip(gc, IRQ_MSK(32), 0, IRQ_NOREQUEST, 0); ++ irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, ++ IRQ_NOREQUEST, 0); + } + + static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) +diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c +index 342ffb7ec3d2..a83f7acdbe03 100644 +--- a/drivers/gpu/drm/i915/intel_crt.c ++++ b/drivers/gpu/drm/i915/intel_crt.c +@@ -579,6 +579,14 @@ static const struct dmi_system_id intel_no_crt[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"), + }, + }, ++ { ++ .callback = intel_no_crt_dmi_callback, ++ .ident = "DELL XPS 8700", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "XPS 8700"), ++ }, ++ }, + { } + }; + +diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c +index c540dfff1f81..b253744fc3c8 100644 +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -1446,9 +1446,9 @@ static void process_deferred_bios(struct pool *pool) + */ + if (ensure_next_mapping(pool)) { + spin_lock_irqsave(&pool->lock, flags); ++ bio_list_add(&pool->deferred_bios, bio); + bio_list_merge(&pool->deferred_bios, &bios); + spin_unlock_irqrestore(&pool->lock, flags); +- + break; + } + +diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c +index 8febe46e1105..9f55d40ec69e 100644 +--- a/drivers/mtd/nand/nuc900_nand.c ++++ b/drivers/mtd/nand/nuc900_nand.c +@@ -250,7 +250,7 @@ static void nuc900_nand_enable(struct nuc900_nand *nand) + val = __raw_readl(nand->reg + REG_FMICSR); + + if (!(val & NAND_EN)) +- __raw_writel(val | NAND_EN, REG_FMICSR); ++ __raw_writel(val | NAND_EN, nand->reg + REG_FMICSR); + + val = __raw_readl(nand->reg + REG_SMCSR); + +diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c +index 9e2dfd517aa5..539835fabe61 100644 +--- a/drivers/mtd/sm_ftl.c ++++ b/drivers/mtd/sm_ftl.c +@@ -59,15 +59,12 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl) + struct attribute_group *attr_group; + struct attribute **attributes; + struct sm_sysfs_attribute *vendor_attribute; ++ char *vendor; + +- int vendor_len = strnlen(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, +- SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET); +- +- char *vendor = kmalloc(vendor_len, GFP_KERNEL); ++ vendor = kstrndup(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, ++ SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET, GFP_KERNEL); + if (!vendor) + goto error1; +- memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len); +- vendor[vendor_len] = 0; + + /* Initialize sysfs attributes */ + vendor_attribute = +@@ -78,7 +75,7 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl) + sysfs_attr_init(&vendor_attribute->dev_attr.attr); + + vendor_attribute->data = vendor; +- vendor_attribute->len = vendor_len; ++ vendor_attribute->len = strlen(vendor); + vendor_attribute->dev_attr.attr.name = "vendor"; + vendor_attribute->dev_attr.attr.mode = S_IRUGO; + vendor_attribute->dev_attr.show = sm_attr_show; +diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c +index 4ce3e1f036cc..547964dff355 100644 +--- a/drivers/net/wireless/b43/phy_n.c ++++ b/drivers/net/wireless/b43/phy_n.c +@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struct b43_wldev *dev, + int ch = new_channel->hw_value; + + u16 old_band_5ghz; +- u32 tmp32; ++ u16 tmp16; + + old_band_5ghz = + b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ; + if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) { +- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); ++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); + b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); + b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ); + } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) { + b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ); +- tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); ++ tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); + b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF); +- b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); ++ b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); + } + + b43_chantab_phy_upload(dev, e); +diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c +index 91d2e28db4d7..a4387acbf220 100644 +--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c ++++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c +@@ -985,6 +985,17 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) + struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); + int err = 0; + static bool iqk_initialized; ++ unsigned long flags; ++ ++ /* As this function can take a very long time (up to 350 ms) ++ * and can be called with irqs disabled, reenable the irqs ++ * to let the other devices continue being serviced. ++ * ++ * It is safe doing so since our own interrupts will only be enabled ++ * in a subsequent step. ++ */ ++ local_save_flags(flags); ++ local_irq_enable(); + + rtlhal->hw_type = HARDWARE_TYPE_RTL8192CU; + err = _rtl92cu_init_mac(hw); +@@ -997,7 +1008,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) + RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, + "Failed to download FW. Init HW without FW now..\n"); + err = 1; +- return err; ++ goto exit; + } + rtlhal->last_hmeboxnum = 0; /* h2c */ + _rtl92cu_phy_param_tab_init(hw); +@@ -1034,6 +1045,8 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) + _InitPABias(hw); + _update_mac_setting(hw); + rtl92c_dm_init(hw); ++exit: ++ local_irq_restore(flags); + return err; + } + +diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c +index b141c35bf926..f90eb0cd7ae5 100644 +--- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c ++++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c +@@ -922,7 +922,7 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw)); + u8 tmp_byte = 0; +- ++ unsigned long flags; + bool rtstatus = true; + u8 tmp_u1b; + int err = false; +@@ -934,6 +934,16 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + + rtlpci->being_init_adapter = true; + ++ /* As this function can take a very long time (up to 350 ms) ++ * and can be called with irqs disabled, reenable the irqs ++ * to let the other devices continue being serviced. ++ * ++ * It is safe doing so since our own interrupts will only be enabled ++ * in a subsequent step. ++ */ ++ local_save_flags(flags); ++ local_irq_enable(); ++ + rtlpriv->intf_ops->disable_aspm(hw); + + /* 1. MAC Initialize */ +@@ -951,7 +961,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, + "Failed to download FW. Init HW without FW now... " + "Please copy FW into /lib/firmware/rtlwifi\n"); +- return 1; ++ err = 1; ++ goto exit; + } + + /* After FW download, we have to reset MAC register */ +@@ -964,7 +975,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + /* 3. Initialize MAC/PHY Config by MACPHY_reg.txt */ + if (!rtl92s_phy_mac_config(hw)) { + RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "MAC Config failed\n"); +- return rtstatus; ++ err = rtstatus; ++ goto exit; + } + + /* Make sure BB/RF write OK. We should prevent enter IPS. radio off. */ +@@ -974,7 +986,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + /* 4. Initialize BB After MAC Config PHY_reg.txt, AGC_Tab.txt */ + if (!rtl92s_phy_bb_config(hw)) { + RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG, "BB Config failed\n"); +- return rtstatus; ++ err = rtstatus; ++ goto exit; + } + + /* 5. Initiailze RF RAIO_A.txt RF RAIO_B.txt */ +@@ -1010,7 +1023,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + + if (!rtl92s_phy_rf_config(hw)) { + RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "RF Config failed\n"); +- return rtstatus; ++ err = rtstatus; ++ goto exit; + } + + /* After read predefined TXT, we must set BB/MAC/RF +@@ -1084,8 +1098,9 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) + + rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_ON); + rtl92s_dm_init(hw); ++exit: ++ local_irq_restore(flags); + rtlpci->being_init_adapter = false; +- + return err; + } + +diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c +index 35a05d1df9cf..9b2fb60d49c4 100644 +--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c ++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c +@@ -8086,7 +8086,6 @@ _scsih_suspend(struct pci_dev *pdev, pm_message_t state) + + mpt2sas_base_free_resources(ioc); + pci_save_state(pdev); +- pci_disable_device(pdev); + pci_set_power_state(pdev, device_state); + return 0; + } +diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c +index 1fbffaa9958e..1da617f1c588 100644 +--- a/drivers/tty/hvc/hvc_console.c ++++ b/drivers/tty/hvc/hvc_console.c +@@ -190,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct console *c, int *index) + return hvc_driver; + } + +-static int __init hvc_console_setup(struct console *co, char *options) ++static int hvc_console_setup(struct console *co, char *options) + { + if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES) + return -ENODEV; +diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c +index e45833ce975b..182bd680141f 100644 +--- a/drivers/video/aty/mach64_accel.c ++++ b/drivers/video/aty/mach64_accel.c +@@ -4,6 +4,7 @@ + */ + + #include <linux/delay.h> ++#include <asm/unaligned.h> + #include <linux/fb.h> + #include <video/mach64.h> + #include "atyfb.h" +@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) + u32 *pbitmap, dwords = (src_bytes + 3) / 4; + for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) { + wait_for_fifo(1, par); +- aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par); ++ aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); + } + } + +diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c +index 46f72ed53510..4b87318dcf44 100644 +--- a/drivers/video/aty/mach64_cursor.c ++++ b/drivers/video/aty/mach64_cursor.c +@@ -5,6 +5,7 @@ + #include <linux/fb.h> + #include <linux/init.h> + #include <linux/string.h> ++#include "../fb_draw.h" + + #include <asm/io.h> + +@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { ++ u16 l = 0xaaaa; + b = *src++; + m = *msk++; + switch (cursor->rop) { + case ROP_XOR: + // Upper 4 bits of mask data +- fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++); ++ l = cursor_bits_lookup[(b ^ m) >> 4] | + // Lower 4 bits of mask +- fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f], +- dst++); ++ (cursor_bits_lookup[(b ^ m) & 0x0f] << 8); + break; + case ROP_COPY: + // Upper 4 bits of mask data +- fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++); ++ l = cursor_bits_lookup[(b & m) >> 4] | + // Lower 4 bits of mask +- fb_writeb(cursor_bits_lookup[(b & m) & 0x0f], +- dst++); ++ (cursor_bits_lookup[(b & m) & 0x0f] << 8); + break; + } ++ /* ++ * If cursor size is not a multiple of 8 characters ++ * we must pad it with transparent pattern (0xaaaa). ++ */ ++ if ((j + 1) * 8 > cursor->image.width) { ++ l = comp(l, 0xaaaa, ++ (1 << ((cursor->image.width & 7) * 2)) - 1); ++ } ++ fb_writeb(l & 0xff, dst++); ++ fb_writeb(l >> 8, dst++); + } + dst += offset; + } +diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c +index bb5a96b1645d..bcb57235fcc7 100644 +--- a/drivers/video/cfbcopyarea.c ++++ b/drivers/video/cfbcopyarea.c +@@ -43,13 +43,22 @@ + */ + + static void +-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, +- const unsigned long __iomem *src, int src_idx, int bits, ++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, ++ const unsigned long __iomem *src, unsigned src_idx, int bits, + unsigned n, u32 bswapmask) + { + unsigned long first, last; + int const shift = dst_idx-src_idx; +- int left, right; ++ ++#if 0 ++ /* ++ * If you suspect bug in this function, compare it with this simple ++ * memmove implementation. ++ */ ++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, ++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); ++ return; ++#endif + + first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask); + last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask); +@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + unsigned long d0, d1; + int m; + +- right = shift & (bits - 1); +- left = -shift & (bits - 1); +- bswapmask &= shift; ++ int const left = shift & (bits - 1); ++ int const right = -shift & (bits - 1); + + if (dst_idx+n <= bits) { + // Single destination word +@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + d0 = fb_rev_pixels_in_long(d0, bswapmask); + if (shift > 0) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else if (src_idx+n <= bits) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src + 1); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0<<left | d1>>right; ++ d0 = d0 >> right | d1 << left; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); +@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + if (shift > 0) { + // Single source word + d1 = d0; +- d0 >>= right; +- dst++; ++ d0 <<= left; + n -= bits - dst_idx; + } else { + // 2 source words + d1 = FB_READL(src++); + d1 = fb_rev_pixels_in_long(d1, bswapmask); + +- d0 = d0<<left | d1>>right; +- dst++; ++ d0 = d0 >> right | d1 << left; + n -= bits - dst_idx; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); + d0 = d1; ++ dst++; + + // Main chunk + m = n % bits; + n /= bits; + while ((n >= 4) && !bswapmask) { + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + d1 = FB_READL(src++); +- FB_WRITEL(d0 << left | d1 >> right, dst++); ++ FB_WRITEL(d0 >> right | d1 << left, dst++); + d0 = d1; + n -= 4; + } + while (n--) { + d1 = FB_READL(src++); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0 << left | d1 >> right; ++ d0 = d0 >> right | d1 << left; + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(d0, dst++); + d0 = d1; + } + + // Trailing bits +- if (last) { +- if (m <= right) { ++ if (m) { ++ if (m <= bits - right) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src); + d1 = fb_rev_pixels_in_long(d1, + bswapmask); +- d0 = d0<<left | d1>>right; ++ d0 = d0 >> right | d1 << left; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); +@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + */ + + static void +-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, +- const unsigned long __iomem *src, int src_idx, int bits, ++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, ++ const unsigned long __iomem *src, unsigned src_idx, int bits, + unsigned n, u32 bswapmask) + { + unsigned long first, last; + int shift; + +- dst += (n-1)/bits; +- src += (n-1)/bits; +- if ((n-1) % bits) { +- dst_idx += (n-1) % bits; +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= bits - 1; +- src_idx += (n-1) % bits; +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= bits - 1; +- } ++#if 0 ++ /* ++ * If you suspect bug in this function, compare it with this simple ++ * memmove implementation. ++ */ ++ fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, ++ (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); ++ return; ++#endif ++ ++ dst += (dst_idx + n - 1) / bits; ++ src += (src_idx + n - 1) / bits; ++ dst_idx = (dst_idx + n - 1) % bits; ++ src_idx = (src_idx + n - 1) % bits; + + shift = dst_idx-src_idx; + +- first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask); +- last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits), +- bswapmask); ++ first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask); ++ last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask); + + if (!shift) { + // Same alignment for source and dest + + if ((unsigned long)dst_idx+1 >= n) { + // Single word +- if (last) +- first &= last; +- FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); ++ if (first) ++ last &= first; ++ FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); + } else { + // Multiple destination words + + // Leading bits +- if (first != ~0UL) { ++ if (first) { + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); + dst--; + src--; +@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + FB_WRITEL(FB_READL(src--), dst--); + + // Trailing bits +- if (last) ++ if (last != -1UL) + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); + } + } else { +@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + unsigned long d0, d1; + int m; + +- int const left = -shift & (bits-1); +- int const right = shift & (bits-1); +- bswapmask &= shift; ++ int const left = shift & (bits-1); ++ int const right = -shift & (bits-1); + + if ((unsigned long)dst_idx+1 >= n) { + // Single destination word +- if (last) +- first &= last; ++ if (first) ++ last &= first; + d0 = FB_READL(src); + if (shift < 0) { + // Single source word +- d0 <<= left; ++ d0 >>= right; + } else if (1+(unsigned long)src_idx >= n) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else { + // 2 source words + d1 = FB_READL(src - 1); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0>>right | d1<<left; ++ d0 = d0 << left | d1 >> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); +- FB_WRITEL(comp(d0, FB_READL(dst), first), dst); ++ FB_WRITEL(comp(d0, FB_READL(dst), last), dst); + } else { + // Multiple destination words + /** We must always remember the last value read, because in case +@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + if (shift < 0) { + // Single source word + d1 = d0; +- d0 <<= left; ++ d0 >>= right; + } else { + // 2 source words + d1 = FB_READL(src--); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0>>right | d1<<left; ++ d0 = d0 << left | d1 >> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), first), dst); +@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, + n /= bits; + while ((n >= 4) && !bswapmask) { + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + d1 = FB_READL(src--); +- FB_WRITEL(d0 >> right | d1 << left, dst--); ++ FB_WRITEL(d0 << left | d1 >> right, dst--); + d0 = d1; + n -= 4; + } + while (n--) { + d1 = FB_READL(src--); + d1 = fb_rev_pixels_in_long(d1, bswapmask); +- d0 = d0 >> right | d1 << left; ++ d0 = d0 << left | d1 >> right; + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(d0, dst--); + d0 = d1; + } + + // Trailing bits +- if (last) { +- if (m <= left) { ++ if (m) { ++ if (m <= bits - left) { + // Single source word +- d0 >>= right; ++ d0 <<= left; + } else { + // 2 source words + d1 = FB_READL(src); + d1 = fb_rev_pixels_in_long(d1, + bswapmask); +- d0 = d0>>right | d1<<left; ++ d0 = d0 << left | d1 >> right; + } + d0 = fb_rev_pixels_in_long(d0, bswapmask); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); +@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) + u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy; + u32 height = area->height, width = area->width; + unsigned long const bits_per_line = p->fix.line_length*8u; +- unsigned long __iomem *dst = NULL, *src = NULL; ++ unsigned long __iomem *base = NULL; + int bits = BITS_PER_LONG, bytes = bits >> 3; +- int dst_idx = 0, src_idx = 0, rev_copy = 0; ++ unsigned dst_idx = 0, src_idx = 0, rev_copy = 0; + u32 bswapmask = fb_compute_bswapmask(p); + + if (p->state != FBINFO_STATE_RUNNING) +@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) + + // split the base of the framebuffer into a long-aligned address and the + // index of the first bit +- dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); ++ base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); + dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1)); + // add offset of source and target area + dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel; +@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) + while (height--) { + dst_idx -= bits_per_line; + src_idx -= bits_per_line; +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= (bytes - 1); +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= (bytes - 1); +- bitcpy_rev(p, dst, dst_idx, src, src_idx, bits, ++ bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits, ++ base + (src_idx / bits), src_idx % bits, bits, + width*p->var.bits_per_pixel, bswapmask); + } + } else { + while (height--) { +- dst += dst_idx >> (ffs(bits) - 1); +- dst_idx &= (bytes - 1); +- src += src_idx >> (ffs(bits) - 1); +- src_idx &= (bytes - 1); +- bitcpy(p, dst, dst_idx, src, src_idx, bits, ++ bitcpy(p, base + (dst_idx / bits), dst_idx % bits, ++ base + (src_idx / bits), src_idx % bits, bits, + width*p->var.bits_per_pixel, bswapmask); + dst_idx += bits_per_line; + src_idx += bits_per_line; +diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c +index 8335a6fe303e..0d5cb85d071a 100644 +--- a/drivers/video/matrox/matroxfb_accel.c ++++ b/drivers/video/matrox/matroxfb_accel.c +@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_info *minfo) + minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO; + if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC; + minfo->accel.m_opmode = mopmode; ++ minfo->accel.m_access = maccess; ++ minfo->accel.m_pitch = mpitch; + } + + EXPORT_SYMBOL(matrox_cfbX_init); + ++static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo) ++{ ++ mga_outl(M_MACCESS, minfo->accel.m_access); ++ mga_outl(M_PITCH, minfo->accel.m_pitch); ++} ++ + static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, + int sx, int dy, int dx, int height, int width) + { +@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, + CRITBEGIN + + if ((dy < sy) || ((dy == sy) && (dx <= sx))) { +- mga_fifo(2); ++ mga_fifo(4); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | + M_DWG_BFCOL | M_DWG_REPLACE); + mga_outl(M_AR5, vxres); +@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, + start = sy*vxres+sx+curr_ydstorg(minfo); + end = start+width; + } else { +- mga_fifo(3); ++ mga_fifo(5); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); + mga_outl(M_SGN, 5); + mga_outl(M_AR5, -vxres); +@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, + start = end+width; + dy += height-1; + } +- mga_fifo(4); ++ mga_fifo(6); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_AR0, end); + mga_outl(M_AR3, start); + mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); +@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, + CRITBEGIN + + if ((dy < sy) || ((dy == sy) && (dx <= sx))) { +- mga_fifo(2); ++ mga_fifo(4); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | + M_DWG_BFCOL | M_DWG_REPLACE); + mga_outl(M_AR5, vxres); +@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, + start = sy*vxres+sx+curr_ydstorg(minfo); + end = start+width; + } else { +- mga_fifo(3); ++ mga_fifo(5); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); + mga_outl(M_SGN, 5); + mga_outl(M_AR5, -vxres); +@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, + start = end+width; + dy += height-1; + } +- mga_fifo(5); ++ mga_fifo(7); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_AR0, end); + mga_outl(M_AR3, start); + mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); +@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color, + + CRITBEGIN + +- mga_fifo(5); ++ mga_fifo(7); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE); + mga_outl(M_FCOL, color); + mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); +@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx, + width >>= 1; + sx >>= 1; + if (width) { +- mga_fifo(5); ++ mga_fifo(7); ++ matrox_accel_restore_maccess(minfo); + mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2); + mga_outl(M_FCOL, bgx); + mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); +@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, + + CRITBEGIN + +- mga_fifo(3); ++ mga_fifo(5); ++ matrox_accel_restore_maccess(minfo); + if (easy) + mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE); + else +@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, + fxbndry = ((xx + width - 1) << 16) | xx; + mmio = minfo->mmio.vbase; + +- mga_fifo(6); ++ mga_fifo(8); ++ matrox_accel_restore_maccess(minfo); + mga_writel(mmio, M_FXBNDRY, fxbndry); + mga_writel(mmio, M_AR0, ar0); + mga_writel(mmio, M_AR3, 0); +diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h +index 11ed57bb704e..556d96ce40bf 100644 +--- a/drivers/video/matrox/matroxfb_base.h ++++ b/drivers/video/matrox/matroxfb_base.h +@@ -307,6 +307,8 @@ struct matrox_accel_data { + #endif + u_int32_t m_dwg_rect; + u_int32_t m_opmode; ++ u_int32_t m_access; ++ u_int32_t m_pitch; + }; + + struct v4l2_queryctrl; +diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c +index aba7686b1a32..ac2cf6dcc598 100644 +--- a/drivers/video/tgafb.c ++++ b/drivers/video/tgafb.c +@@ -1146,222 +1146,57 @@ copyarea_line_32bpp(struct fb_info *info, u32 dy, u32 sy, + __raw_writel(TGA_MODE_SBM_24BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); + } + +-/* The general case of forward copy in 8bpp mode. */ ++/* The (almost) general case of backward copy in 8bpp mode. */ + static inline void +-copyarea_foreward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, +- u32 height, u32 width, u32 line_length) ++copyarea_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, ++ u32 height, u32 width, u32 line_length, ++ const struct fb_copyarea *area) + { + struct tga_par *par = (struct tga_par *) info->par; +- unsigned long i, copied, left; +- unsigned long dpos, spos, dalign, salign, yincr; +- u32 smask_first, dmask_first, dmask_last; +- int pixel_shift, need_prime, need_second; +- unsigned long n64, n32, xincr_first; ++ unsigned i, yincr; ++ int depos, sepos, backward, last_step, step; ++ u32 mask_last; ++ unsigned n32; + void __iomem *tga_regs; + void __iomem *tga_fb; + +- yincr = line_length; +- if (dy > sy) { +- dy += height - 1; +- sy += height - 1; +- yincr = -yincr; +- } +- +- /* Compute the offsets and alignments in the frame buffer. +- More than anything else, these control how we do copies. */ +- dpos = dy * line_length + dx; +- spos = sy * line_length + sx; +- dalign = dpos & 7; +- salign = spos & 7; +- dpos &= -8; +- spos &= -8; +- +- /* Compute the value for the PIXELSHIFT register. This controls +- both non-co-aligned source and destination and copy direction. */ +- if (dalign >= salign) +- pixel_shift = dalign - salign; +- else +- pixel_shift = 8 - (salign - dalign); +- +- /* Figure out if we need an additional priming step for the +- residue register. */ +- need_prime = (salign > dalign); +- if (need_prime) +- dpos -= 8; +- +- /* Begin by copying the leading unaligned destination. Copy enough +- to make the next destination address 32-byte aligned. */ +- copied = 32 - (dalign + (dpos & 31)); +- if (copied == 32) +- copied = 0; +- xincr_first = (copied + 7) & -8; +- smask_first = dmask_first = (1ul << copied) - 1; +- smask_first <<= salign; +- dmask_first <<= dalign + need_prime*8; +- if (need_prime && copied > 24) +- copied -= 8; +- left = width - copied; +- +- /* Care for small copies. */ +- if (copied > width) { +- u32 t; +- t = (1ul << width) - 1; +- t <<= dalign + need_prime*8; +- dmask_first &= t; +- left = 0; +- } +- +- /* Attempt to use 64-byte copies. This is only possible if the +- source and destination are co-aligned at 64 bytes. */ +- n64 = need_second = 0; +- if ((dpos & 63) == (spos & 63) +- && (height == 1 || line_length % 64 == 0)) { +- /* We may need a 32-byte copy to ensure 64 byte alignment. */ +- need_second = (dpos + xincr_first) & 63; +- if ((need_second & 32) != need_second) +- printk(KERN_ERR "tgafb: need_second wrong\n"); +- if (left >= need_second + 64) { +- left -= need_second; +- n64 = left / 64; +- left %= 64; +- } else +- need_second = 0; +- } +- +- /* Copy trailing full 32-byte sections. This will be the main +- loop if the 64 byte loop can't be used. */ +- n32 = left / 32; +- left %= 32; +- +- /* Copy the trailing unaligned destination. */ +- dmask_last = (1ul << left) - 1; +- +- tga_regs = par->tga_regs_base; +- tga_fb = par->tga_fb_base; +- +- /* Set up the MODE and PIXELSHIFT registers. */ +- __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_COPY, tga_regs+TGA_MODE_REG); +- __raw_writel(pixel_shift, tga_regs+TGA_PIXELSHIFT_REG); +- wmb(); +- +- for (i = 0; i < height; ++i) { +- unsigned long j; +- void __iomem *sfb; +- void __iomem *dfb; +- +- sfb = tga_fb + spos; +- dfb = tga_fb + dpos; +- if (dmask_first) { +- __raw_writel(smask_first, sfb); +- wmb(); +- __raw_writel(dmask_first, dfb); +- wmb(); +- sfb += xincr_first; +- dfb += xincr_first; +- } +- +- if (need_second) { +- __raw_writel(0xffffffff, sfb); +- wmb(); +- __raw_writel(0xffffffff, dfb); +- wmb(); +- sfb += 32; +- dfb += 32; +- } +- +- if (n64 && (((unsigned long)sfb | (unsigned long)dfb) & 63)) +- printk(KERN_ERR +- "tgafb: misaligned copy64 (s:%p, d:%p)\n", +- sfb, dfb); +- +- for (j = 0; j < n64; ++j) { +- __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); +- wmb(); +- __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); +- wmb(); +- sfb += 64; +- dfb += 64; +- } +- +- for (j = 0; j < n32; ++j) { +- __raw_writel(0xffffffff, sfb); +- wmb(); +- __raw_writel(0xffffffff, dfb); +- wmb(); +- sfb += 32; +- dfb += 32; +- } +- +- if (dmask_last) { +- __raw_writel(0xffffffff, sfb); +- wmb(); +- __raw_writel(dmask_last, dfb); +- wmb(); +- } +- +- spos += yincr; +- dpos += yincr; ++ /* Do acceleration only if we are aligned on 8 pixels */ ++ if ((dx | sx | width) & 7) { ++ cfb_copyarea(info, area); ++ return; + } + +- /* Reset the MODE register to normal. */ +- __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); +-} +- +-/* The (almost) general case of backward copy in 8bpp mode. */ +-static inline void +-copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, +- u32 height, u32 width, u32 line_length, +- const struct fb_copyarea *area) +-{ +- struct tga_par *par = (struct tga_par *) info->par; +- unsigned long i, left, yincr; +- unsigned long depos, sepos, dealign, sealign; +- u32 mask_first, mask_last; +- unsigned long n32; +- void __iomem *tga_regs; +- void __iomem *tga_fb; +- + yincr = line_length; + if (dy > sy) { + dy += height - 1; + sy += height - 1; + yincr = -yincr; + } ++ backward = dy == sy && dx > sx && dx < sx + width; + + /* Compute the offsets and alignments in the frame buffer. + More than anything else, these control how we do copies. */ +- depos = dy * line_length + dx + width; +- sepos = sy * line_length + sx + width; +- dealign = depos & 7; +- sealign = sepos & 7; +- +- /* ??? The documentation appears to be incorrect (or very +- misleading) wrt how pixel shifting works in backward copy +- mode, i.e. when PIXELSHIFT is negative. I give up for now. +- Do handle the common case of co-aligned backward copies, +- but frob everything else back on generic code. */ +- if (dealign != sealign) { +- cfb_copyarea(info, area); +- return; +- } +- +- /* We begin the copy with the trailing pixels of the +- unaligned destination. */ +- mask_first = (1ul << dealign) - 1; +- left = width - dealign; +- +- /* Care for small copies. */ +- if (dealign > width) { +- mask_first ^= (1ul << (dealign - width)) - 1; +- left = 0; +- } ++ depos = dy * line_length + dx; ++ sepos = sy * line_length + sx; ++ if (backward) ++ depos += width, sepos += width; + + /* Next copy full words at a time. */ +- n32 = left / 32; +- left %= 32; ++ n32 = width / 32; ++ last_step = width % 32; + + /* Finally copy the unaligned head of the span. */ +- mask_last = -1 << (32 - left); ++ mask_last = (1ul << last_step) - 1; ++ ++ if (!backward) { ++ step = 32; ++ last_step = 32; ++ } else { ++ step = -32; ++ last_step = -last_step; ++ sepos -= 32; ++ depos -= 32; ++ } + + tga_regs = par->tga_regs_base; + tga_fb = par->tga_fb_base; +@@ -1378,25 +1213,33 @@ copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, + + sfb = tga_fb + sepos; + dfb = tga_fb + depos; +- if (mask_first) { +- __raw_writel(mask_first, sfb); +- wmb(); +- __raw_writel(mask_first, dfb); +- wmb(); +- } + +- for (j = 0; j < n32; ++j) { +- sfb -= 32; +- dfb -= 32; ++ for (j = 0; j < n32; j++) { ++ if (j < 2 && j + 1 < n32 && !backward && ++ !(((unsigned long)sfb | (unsigned long)dfb) & 63)) { ++ do { ++ __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); ++ wmb(); ++ __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); ++ wmb(); ++ sfb += 64; ++ dfb += 64; ++ j += 2; ++ } while (j + 1 < n32); ++ j--; ++ continue; ++ } + __raw_writel(0xffffffff, sfb); + wmb(); + __raw_writel(0xffffffff, dfb); + wmb(); ++ sfb += step; ++ dfb += step; + } + + if (mask_last) { +- sfb -= 32; +- dfb -= 32; ++ sfb += last_step - step; ++ dfb += last_step - step; + __raw_writel(mask_last, sfb); + wmb(); + __raw_writel(mask_last, dfb); +@@ -1457,14 +1300,9 @@ tgafb_copyarea(struct fb_info *info, const struct fb_copyarea *area) + else if (bpp == 32) + cfb_copyarea(info, area); + +- /* Detect overlapping source and destination that requires +- a backward copy. */ +- else if (dy == sy && dx > sx && dx < sx + width) +- copyarea_backward_8bpp(info, dx, dy, sx, sy, height, +- width, line_length, area); + else +- copyarea_foreward_8bpp(info, dx, dy, sx, sy, height, +- width, line_length); ++ copyarea_8bpp(info, dx, dy, sx, sy, height, ++ width, line_length, area); + } + + +diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c +index 8807fe501d20..436625457311 100644 +--- a/drivers/virtio/virtio_balloon.c ++++ b/drivers/virtio/virtio_balloon.c +@@ -305,6 +305,12 @@ static int balloon(void *_vballoon) + else if (diff < 0) + leak_balloon(vb, -diff); + update_balloon_size(vb); ++ ++ /* ++ * For large balloon changes, we could spend a lot of time ++ * and always have work to do. Be nice if preempt disabled. ++ */ ++ cond_resched(); + } + return 0; + } +diff --git a/fs/locks.c b/fs/locks.c +index fcc50ab71cc6..d4f1d89d9bc6 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -1253,11 +1253,10 @@ int __break_lease(struct inode *inode, unsigned int mode) + + restart: + break_time = flock->fl_break_time; +- if (break_time != 0) { ++ if (break_time != 0) + break_time -= jiffies; +- if (break_time == 0) +- break_time++; +- } ++ if (break_time == 0) ++ break_time++; + locks_insert_block(flock, new_fl); + unlock_flocks(); + error = wait_event_interruptible_timeout(new_fl->fl_wait, +diff --git a/include/linux/libata.h b/include/linux/libata.h +index 7e13eb428cb2..50d7cb1ee947 100644 +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -762,6 +762,7 @@ struct ata_port { + unsigned long qc_allocated; + unsigned int qc_active; + int nr_active_links; /* #links with active qcs */ ++ unsigned int last_tag; /* track next tag hw expects */ + + struct ata_link link; /* host default link */ + struct ata_link *slave_link; /* see ata_slave_link_init() */