Author: mpagano
Date: 2014-05-13 14:17:59 +0000 (Tue, 13 May 2014)
New Revision: 2785

Added:
   genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch
Modified:
   genpatches-2.6/trunk/3.4/0000_README
Log:
Linux patch 3.4.90

Modified: genpatches-2.6/trunk/3.4/0000_README
===================================================================
--- genpatches-2.6/trunk/3.4/0000_README        2014-05-13 13:57:00 UTC (rev 
2784)
+++ genpatches-2.6/trunk/3.4/0000_README        2014-05-13 14:17:59 UTC (rev 
2785)
@@ -395,6 +395,10 @@
 From:   http://www.kernel.org
 Desc:   Linux 3.4.89
 
+Patch:  1089_linux-3.4.90.patch
+From:   http://www.kernel.org
+Desc:   Linux 3.4.90
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

Added: genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch
===================================================================
--- genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch                            
(rev 0)
+++ genpatches-2.6/trunk/3.4/1089_linux-3.4.90.patch    2014-05-13 14:17:59 UTC 
(rev 2785)
@@ -0,0 +1,1388 @@
+diff --git a/Makefile b/Makefile
+index 2c2ec2cedd52..aa1001213eb1 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 3
+ PATCHLEVEL = 4
+-SUBLEVEL = 89
++SUBLEVEL = 90
+ EXTRAVERSION =
+ NAME = Saber-toothed Squirrel
+ 
+diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
+index f8a751c03282..5bf34ec89669 100644
+--- a/arch/mips/power/hibernate.S
++++ b/arch/mips/power/hibernate.S
+@@ -44,6 +44,7 @@ LEAF(swsusp_arch_resume)
+       bne t1, t3, 1b
+       PTR_L t0, PBE_NEXT(t0)
+       bnez t0, 0b
++      jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */
+       PTR_LA t0, saved_regs
+       PTR_L ra, PT_R31(t0)
+       PTR_L sp, PT_R29(t0)
+diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S 
b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+index 1eb7f90cb7b9..eb4d2a254b35 100644
+--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
++++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
+@@ -24,10 +24,6 @@
+ .align 16
+ .Lbswap_mask:
+       .octa 0x000102030405060708090a0b0c0d0e0f
+-.Lpoly:
+-      .octa 0xc2000000000000000000000000000001
+-.Ltwo_one:
+-      .octa 0x00000001000000000000000000000001
+ 
+ #define DATA  %xmm0
+ #define SHASH %xmm1
+@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update)
+       movups DATA, (%rdi)
+ .Lupdate_just_ret:
+       ret
+-
+-/*
+- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+- *
+- * Calculate hash_key << 1 mod poly
+- */
+-ENTRY(clmul_ghash_setkey)
+-      movaps .Lbswap_mask, BSWAP
+-      movups (%rsi), %xmm0
+-      PSHUFB_XMM BSWAP %xmm0
+-      movaps %xmm0, %xmm1
+-      psllq $1, %xmm0
+-      psrlq $63, %xmm1
+-      movaps %xmm1, %xmm2
+-      pslldq $8, %xmm1
+-      psrldq $8, %xmm2
+-      por %xmm1, %xmm0
+-      # reduction
+-      pshufd $0b00100100, %xmm2, %xmm1
+-      pcmpeqd .Ltwo_one, %xmm1
+-      pand .Lpoly, %xmm1
+-      pxor %xmm1, %xmm0
+-      movups %xmm0, (%rdi)
+-      ret
+diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c 
b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+index b4bf0a63b520..c07446d17463 100644
+--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
++++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
+@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
+ void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+                       const be128 *shash);
+ 
+-void clmul_ghash_setkey(be128 *shash, const u8 *key);
+-
+ struct ghash_async_ctx {
+       struct cryptd_ahash *cryptd_tfm;
+ };
+@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
+                       const u8 *key, unsigned int keylen)
+ {
+       struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
++      be128 *x = (be128 *)key;
++      u64 a, b;
+ 
+       if (keylen != GHASH_BLOCK_SIZE) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+ 
+-      clmul_ghash_setkey(&ctx->shash, key);
++      /* perform multiplication by 'x' in GF(2^128) */
++      a = be64_to_cpu(x->a);
++      b = be64_to_cpu(x->b);
++
++      ctx->shash.a = (__be64)((b << 1) | (a >> 63));
++      ctx->shash.b = (__be64)((a << 1) | (b >> 63));
++
++      if (a >> 63)
++              ctx->shash.b ^= cpu_to_be64(0xc2);
+ 
+       return 0;
+ }
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index f2f37171e21a..6e67fdebdada 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
+ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+ {
+       struct ata_queued_cmd *qc = NULL;
+-      unsigned int i;
++      unsigned int i, tag;
+ 
+       /* no command while frozen */
+       if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+               return NULL;
+ 
+-      /* the last tag is reserved for internal command. */
+-      for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
+-              if (!test_and_set_bit(i, &ap->qc_allocated)) {
+-                      qc = __ata_qc_from_tag(ap, i);
++      for (i = 0; i < ATA_MAX_QUEUE; i++) {
++              tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
++
++              /* the last tag is reserved for internal command. */
++              if (tag == ATA_TAG_INTERNAL)
++                      continue;
++
++              if (!test_and_set_bit(tag, &ap->qc_allocated)) {
++                      qc = __ata_qc_from_tag(ap, tag);
++                      qc->tag = tag;
++                      ap->last_tag = tag;
+                       break;
+               }
+-
+-      if (qc)
+-              qc->tag = i;
++      }
+ 
+       return qc;
+ }
+diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
+index c82f06e639f8..2cac6e64b67d 100644
+--- a/drivers/block/floppy.c
++++ b/drivers/block/floppy.c
+@@ -3058,7 +3058,10 @@ static int raw_cmd_copyout(int cmd, void __user *param,
+       int ret;
+ 
+       while (ptr) {
+-              ret = copy_to_user(param, ptr, sizeof(*ptr));
++              struct floppy_raw_cmd cmd = *ptr;
++              cmd.next = NULL;
++              cmd.kernel_data = NULL;
++              ret = copy_to_user(param, &cmd, sizeof(cmd));
+               if (ret)
+                       return -EFAULT;
+               param += sizeof(struct floppy_raw_cmd);
+@@ -3112,10 +3115,11 @@ loop:
+               return -ENOMEM;
+       *rcmd = ptr;
+       ret = copy_from_user(ptr, param, sizeof(*ptr));
+-      if (ret)
+-              return -EFAULT;
+       ptr->next = NULL;
+       ptr->buffer_length = 0;
++      ptr->kernel_data = NULL;
++      if (ret)
++              return -EFAULT;
+       param += sizeof(struct floppy_raw_cmd);
+       if (ptr->cmd_count > 33)
+                       /* the command may now also take up the space
+@@ -3131,7 +3135,6 @@ loop:
+       for (i = 0; i < 16; i++)
+               ptr->reply[i] = 0;
+       ptr->resultcode = 0;
+-      ptr->kernel_data = NULL;
+ 
+       if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
+               if (ptr->length <= 0)
+diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
+index 385c58e8405b..0f8114de0877 100644
+--- a/drivers/gpio/gpio-mxs.c
++++ b/drivers/gpio/gpio-mxs.c
+@@ -167,7 +167,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port 
*port)
+       ct->regs.ack = PINCTRL_IRQSTAT(port->id) + MXS_CLR;
+       ct->regs.mask = PINCTRL_IRQEN(port->id);
+ 
+-      irq_setup_generic_chip(gc, IRQ_MSK(32), 0, IRQ_NOREQUEST, 0);
++      irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK,
++                             IRQ_NOREQUEST, 0);
+ }
+ 
+ static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+diff --git a/drivers/gpu/drm/i915/intel_crt.c 
b/drivers/gpu/drm/i915/intel_crt.c
+index 342ffb7ec3d2..a83f7acdbe03 100644
+--- a/drivers/gpu/drm/i915/intel_crt.c
++++ b/drivers/gpu/drm/i915/intel_crt.c
+@@ -579,6 +579,14 @@ static const struct dmi_system_id intel_no_crt[] = {
+                       DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"),
+               },
+       },
++      {
++              .callback = intel_no_crt_dmi_callback,
++              .ident = "DELL XPS 8700",
++              .matches = {
++                      DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++                      DMI_MATCH(DMI_PRODUCT_NAME, "XPS 8700"),
++              },
++      },
+       { }
+ };
+ 
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index c540dfff1f81..b253744fc3c8 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -1446,9 +1446,9 @@ static void process_deferred_bios(struct pool *pool)
+                */
+               if (ensure_next_mapping(pool)) {
+                       spin_lock_irqsave(&pool->lock, flags);
++                      bio_list_add(&pool->deferred_bios, bio);
+                       bio_list_merge(&pool->deferred_bios, &bios);
+                       spin_unlock_irqrestore(&pool->lock, flags);
+-
+                       break;
+               }
+ 
+diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
+index 8febe46e1105..9f55d40ec69e 100644
+--- a/drivers/mtd/nand/nuc900_nand.c
++++ b/drivers/mtd/nand/nuc900_nand.c
+@@ -250,7 +250,7 @@ static void nuc900_nand_enable(struct nuc900_nand *nand)
+       val = __raw_readl(nand->reg + REG_FMICSR);
+ 
+       if (!(val & NAND_EN))
+-              __raw_writel(val | NAND_EN, REG_FMICSR);
++              __raw_writel(val | NAND_EN, nand->reg + REG_FMICSR);
+ 
+       val = __raw_readl(nand->reg + REG_SMCSR);
+ 
+diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
+index 9e2dfd517aa5..539835fabe61 100644
+--- a/drivers/mtd/sm_ftl.c
++++ b/drivers/mtd/sm_ftl.c
+@@ -59,15 +59,12 @@ struct attribute_group *sm_create_sysfs_attributes(struct 
sm_ftl *ftl)
+       struct attribute_group *attr_group;
+       struct attribute **attributes;
+       struct sm_sysfs_attribute *vendor_attribute;
++      char *vendor;
+ 
+-      int vendor_len = strnlen(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET,
+-                                      SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET);
+-
+-      char *vendor = kmalloc(vendor_len, GFP_KERNEL);
++      vendor = kstrndup(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET,
++                        SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET, GFP_KERNEL);
+       if (!vendor)
+               goto error1;
+-      memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len);
+-      vendor[vendor_len] = 0;
+ 
+       /* Initialize sysfs attributes */
+       vendor_attribute =
+@@ -78,7 +75,7 @@ struct attribute_group *sm_create_sysfs_attributes(struct 
sm_ftl *ftl)
+       sysfs_attr_init(&vendor_attribute->dev_attr.attr);
+ 
+       vendor_attribute->data = vendor;
+-      vendor_attribute->len = vendor_len;
++      vendor_attribute->len = strlen(vendor);
+       vendor_attribute->dev_attr.attr.name = "vendor";
+       vendor_attribute->dev_attr.attr.mode = S_IRUGO;
+       vendor_attribute->dev_attr.show = sm_attr_show;
+diff --git a/drivers/net/wireless/b43/phy_n.c 
b/drivers/net/wireless/b43/phy_n.c
+index 4ce3e1f036cc..547964dff355 100644
+--- a/drivers/net/wireless/b43/phy_n.c
++++ b/drivers/net/wireless/b43/phy_n.c
+@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struct b43_wldev 
*dev,
+       int ch = new_channel->hw_value;
+ 
+       u16 old_band_5ghz;
+-      u32 tmp32;
++      u16 tmp16;
+ 
+       old_band_5ghz =
+               b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ;
+       if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) {
+-              tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++              tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+               b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+               b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ);
+       } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) {
+               b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ);
+-              tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
++              tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
+               b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF);
+-              b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
++              b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
+       }
+ 
+       b43_chantab_phy_upload(dev, e);
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c 
b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+index 91d2e28db4d7..a4387acbf220 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+@@ -985,6 +985,17 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
+       struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
+       int err = 0;
+       static bool iqk_initialized;
++      unsigned long flags;
++
++      /* As this function can take a very long time (up to 350 ms)
++       * and can be called with irqs disabled, reenable the irqs
++       * to let the other devices continue being serviced.
++       *
++       * It is safe doing so since our own interrupts will only be enabled
++       * in a subsequent step.
++       */
++      local_save_flags(flags);
++      local_irq_enable();
+ 
+       rtlhal->hw_type = HARDWARE_TYPE_RTL8192CU;
+       err = _rtl92cu_init_mac(hw);
+@@ -997,7 +1008,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
+               RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
+                        "Failed to download FW. Init HW without FW now..\n");
+               err = 1;
+-              return err;
++              goto exit;
+       }
+       rtlhal->last_hmeboxnum = 0; /* h2c */
+       _rtl92cu_phy_param_tab_init(hw);
+@@ -1034,6 +1045,8 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
+       _InitPABias(hw);
+       _update_mac_setting(hw);
+       rtl92c_dm_init(hw);
++exit:
++      local_irq_restore(flags);
+       return err;
+ }
+ 
+diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c 
b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
+index b141c35bf926..f90eb0cd7ae5 100644
+--- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
++++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
+@@ -922,7 +922,7 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+       struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+       struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
+       u8 tmp_byte = 0;
+-
++      unsigned long flags;
+       bool rtstatus = true;
+       u8 tmp_u1b;
+       int err = false;
+@@ -934,6 +934,16 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+ 
+       rtlpci->being_init_adapter = true;
+ 
++      /* As this function can take a very long time (up to 350 ms)
++       * and can be called with irqs disabled, reenable the irqs
++       * to let the other devices continue being serviced.
++       *
++       * It is safe doing so since our own interrupts will only be enabled
++       * in a subsequent step.
++       */
++      local_save_flags(flags);
++      local_irq_enable();
++
+       rtlpriv->intf_ops->disable_aspm(hw);
+ 
+       /* 1. MAC Initialize */
+@@ -951,7 +961,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+               RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
+                        "Failed to download FW. Init HW without FW now... "
+                        "Please copy FW into /lib/firmware/rtlwifi\n");
+-              return 1;
++              err = 1;
++              goto exit;
+       }
+ 
+       /* After FW download, we have to reset MAC register */
+@@ -964,7 +975,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+       /* 3. Initialize MAC/PHY Config by MACPHY_reg.txt */
+       if (!rtl92s_phy_mac_config(hw)) {
+               RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "MAC Config failed\n");
+-              return rtstatus;
++              err = rtstatus;
++              goto exit;
+       }
+ 
+       /* Make sure BB/RF write OK. We should prevent enter IPS. radio off. */
+@@ -974,7 +986,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+       /* 4. Initialize BB After MAC Config PHY_reg.txt, AGC_Tab.txt */
+       if (!rtl92s_phy_bb_config(hw)) {
+               RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG, "BB Config failed\n");
+-              return rtstatus;
++              err = rtstatus;
++              goto exit;
+       }
+ 
+       /* 5. Initiailze RF RAIO_A.txt RF RAIO_B.txt */
+@@ -1010,7 +1023,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+ 
+       if (!rtl92s_phy_rf_config(hw)) {
+               RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "RF Config failed\n");
+-              return rtstatus;
++              err = rtstatus;
++              goto exit;
+       }
+ 
+       /* After read predefined TXT, we must set BB/MAC/RF
+@@ -1084,8 +1098,9 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
+ 
+       rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_ON);
+       rtl92s_dm_init(hw);
++exit:
++      local_irq_restore(flags);
+       rtlpci->being_init_adapter = false;
+-
+       return err;
+ }
+ 
+diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+index 35a05d1df9cf..9b2fb60d49c4 100644
+--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
++++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+@@ -8086,7 +8086,6 @@ _scsih_suspend(struct pci_dev *pdev, pm_message_t state)
+ 
+       mpt2sas_base_free_resources(ioc);
+       pci_save_state(pdev);
+-      pci_disable_device(pdev);
+       pci_set_power_state(pdev, device_state);
+       return 0;
+ }
+diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
+index 1fbffaa9958e..1da617f1c588 100644
+--- a/drivers/tty/hvc/hvc_console.c
++++ b/drivers/tty/hvc/hvc_console.c
+@@ -190,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct 
console *c, int *index)
+       return hvc_driver;
+ }
+ 
+-static int __init hvc_console_setup(struct console *co, char *options)
++static int hvc_console_setup(struct console *co, char *options)
+ {     
+       if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES)
+               return -ENODEV;
+diff --git a/drivers/video/aty/mach64_accel.c 
b/drivers/video/aty/mach64_accel.c
+index e45833ce975b..182bd680141f 100644
+--- a/drivers/video/aty/mach64_accel.c
++++ b/drivers/video/aty/mach64_accel.c
+@@ -4,6 +4,7 @@
+  */
+ 
+ #include <linux/delay.h>
++#include <asm/unaligned.h>
+ #include <linux/fb.h>
+ #include <video/mach64.h>
+ #include "atyfb.h"
+@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *info, const struct 
fb_image *image)
+               u32 *pbitmap, dwords = (src_bytes + 3) / 4;
+               for (pbitmap = (u32*)(image->data); dwords; dwords--, 
pbitmap++) {
+                       wait_for_fifo(1, par);
+-                      aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par);
++                      aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), 
par);
+               }
+       }
+ 
+diff --git a/drivers/video/aty/mach64_cursor.c 
b/drivers/video/aty/mach64_cursor.c
+index 46f72ed53510..4b87318dcf44 100644
+--- a/drivers/video/aty/mach64_cursor.c
++++ b/drivers/video/aty/mach64_cursor.c
+@@ -5,6 +5,7 @@
+ #include <linux/fb.h>
+ #include <linux/init.h>
+ #include <linux/string.h>
++#include "../fb_draw.h"
+ 
+ #include <asm/io.h>
+ 
+@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *info, struct 
fb_cursor *cursor)
+ 
+           for (i = 0; i < height; i++) {
+               for (j = 0; j < width; j++) {
++                      u16 l = 0xaaaa;
+                       b = *src++;
+                       m = *msk++;
+                       switch (cursor->rop) {
+                       case ROP_XOR:
+                           // Upper 4 bits of mask data
+-                          fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
++                          l = cursor_bits_lookup[(b ^ m) >> 4] |
+                           // Lower 4 bits of mask
+-                          fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
+-                                    dst++);
++                                  (cursor_bits_lookup[(b ^ m) & 0x0f] << 8);
+                           break;
+                       case ROP_COPY:
+                           // Upper 4 bits of mask data
+-                          fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
++                          l = cursor_bits_lookup[(b & m) >> 4] |
+                           // Lower 4 bits of mask
+-                          fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
+-                                    dst++);
++                                  (cursor_bits_lookup[(b & m) & 0x0f] << 8);
+                           break;
+                       }
++                      /*
++                       * If cursor size is not a multiple of 8 characters
++                       * we must pad it with transparent pattern (0xaaaa).
++                       */
++                      if ((j + 1) * 8 > cursor->image.width) {
++                              l = comp(l, 0xaaaa,
++                                  (1 << ((cursor->image.width & 7) * 2)) - 1);
++                      }
++                      fb_writeb(l & 0xff, dst++);
++                      fb_writeb(l >> 8, dst++);
+               }
+               dst += offset;
+           }
+diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
+index bb5a96b1645d..bcb57235fcc7 100644
+--- a/drivers/video/cfbcopyarea.c
++++ b/drivers/video/cfbcopyarea.c
+@@ -43,13 +43,22 @@
+      */
+ 
+ static void
+-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-              const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++              const unsigned long __iomem *src, unsigned src_idx, int bits,
+               unsigned n, u32 bswapmask)
+ {
+       unsigned long first, last;
+       int const shift = dst_idx-src_idx;
+-      int left, right;
++
++#if 0
++      /*
++       * If you suspect bug in this function, compare it with this simple
++       * memmove implementation.
++       */
++      fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++                 (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++      return;
++#endif
+ 
+       first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask);
+@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int 
dst_idx,
+               unsigned long d0, d1;
+               int m;
+ 
+-              right = shift & (bits - 1);
+-              left = -shift & (bits - 1);
+-              bswapmask &= shift;
++              int const left = shift & (bits - 1);
++              int const right = -shift & (bits - 1);
+ 
+               if (dst_idx+n <= bits) {
+                       // Single destination word
+@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, 
int dst_idx,
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       if (shift > 0) {
+                               // Single source word
+-                              d0 >>= right;
++                              d0 <<= left;
+                       } else if (src_idx+n <= bits) {
+                               // Single source word
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src + 1);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0<<left | d1>>right;
++                              d0 = d0 >> right | d1 << left;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, 
int dst_idx,
+                       if (shift > 0) {
+                               // Single source word
+                               d1 = d0;
+-                              d0 >>= right;
+-                              dst++;
++                              d0 <<= left;
+                               n -= bits - dst_idx;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src++);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+ 
+-                              d0 = d0<<left | d1>>right;
+-                              dst++;
++                              d0 = d0 >> right | d1 << left;
+                               n -= bits - dst_idx;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+                       d0 = d1;
++                      dst++;
+ 
+                       // Main chunk
+                       m = n % bits;
+                       n /= bits;
+                       while ((n >= 4) && !bswapmask) {
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               d1 = FB_READL(src++);
+-                              FB_WRITEL(d0 << left | d1 >> right, dst++);
++                              FB_WRITEL(d0 >> right | d1 << left, dst++);
+                               d0 = d1;
+                               n -= 4;
+                       }
+                       while (n--) {
+                               d1 = FB_READL(src++);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0 << left | d1 >> right;
++                              d0 = d0 >> right | d1 << left;
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(d0, dst++);
+                               d0 = d1;
+                       }
+ 
+                       // Trailing bits
+-                      if (last) {
+-                              if (m <= right) {
++                      if (m) {
++                              if (m <= bits - right) {
+                                       // Single source word
+-                                      d0 <<= left;
++                                      d0 >>= right;
+                               } else {
+                                       // 2 source words
+                                       d1 = FB_READL(src);
+                                       d1 = fb_rev_pixels_in_long(d1,
+                                                               bswapmask);
+-                                      d0 = d0<<left | d1>>right;
++                                      d0 = d0 >> right | d1 << left;
+                               }
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, 
int dst_idx,
+      */
+ 
+ static void
+-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
+-              const unsigned long __iomem *src, int src_idx, int bits,
++bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
++              const unsigned long __iomem *src, unsigned src_idx, int bits,
+               unsigned n, u32 bswapmask)
+ {
+       unsigned long first, last;
+       int shift;
+ 
+-      dst += (n-1)/bits;
+-      src += (n-1)/bits;
+-      if ((n-1) % bits) {
+-              dst_idx += (n-1) % bits;
+-              dst += dst_idx >> (ffs(bits) - 1);
+-              dst_idx &= bits - 1;
+-              src_idx += (n-1) % bits;
+-              src += src_idx >> (ffs(bits) - 1);
+-              src_idx &= bits - 1;
+-      }
++#if 0
++      /*
++       * If you suspect bug in this function, compare it with this simple
++       * memmove implementation.
++       */
++      fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
++                 (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
++      return;
++#endif
++
++      dst += (dst_idx + n - 1) / bits;
++      src += (src_idx + n - 1) / bits;
++      dst_idx = (dst_idx + n - 1) % bits;
++      src_idx = (src_idx + n - 1) % bits;
+ 
+       shift = dst_idx-src_idx;
+ 
+-      first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask);
+-      last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits),
+-                                          bswapmask);
++      first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, 
bswapmask);
++      last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, 
bswapmask);
+ 
+       if (!shift) {
+               // Same alignment for source and dest
+ 
+               if ((unsigned long)dst_idx+1 >= n) {
+                       // Single word
+-                      if (last)
+-                              first &= last;
+-                      FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), 
dst);
++                      if (first)
++                              last &= first;
++                      FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), 
dst);
+               } else {
+                       // Multiple destination words
+ 
+                       // Leading bits
+-                      if (first != ~0UL) {
++                      if (first) {
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), 
first), dst);
+                               dst--;
+                               src--;
+@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, 
int dst_idx,
+                               FB_WRITEL(FB_READL(src--), dst--);
+ 
+                       // Trailing bits
+-                      if (last)
++                      if (last != -1UL)
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), 
last), dst);
+               }
+       } else {
+@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem 
*dst, int dst_idx,
+               unsigned long d0, d1;
+               int m;
+ 
+-              int const left = -shift & (bits-1);
+-              int const right = shift & (bits-1);
+-              bswapmask &= shift;
++              int const left = shift & (bits-1);
++              int const right = -shift & (bits-1);
+ 
+               if ((unsigned long)dst_idx+1 >= n) {
+                       // Single destination word
+-                      if (last)
+-                              first &= last;
++                      if (first)
++                              last &= first;
+                       d0 = FB_READL(src);
+                       if (shift < 0) {
+                               // Single source word
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else if (1+(unsigned long)src_idx >= n) {
+                               // Single source word
+-                              d0 >>= right;
++                              d0 <<= left;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src - 1);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0>>right | d1<<left;
++                              d0 = d0 << left | d1 >> right;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+-                      FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
++                      FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+               } else {
+                       // Multiple destination words
+                       /** We must always remember the last value read, 
because in case
+@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem 
*dst, int dst_idx,
+                       if (shift < 0) {
+                               // Single source word
+                               d1 = d0;
+-                              d0 <<= left;
++                              d0 >>= right;
+                       } else {
+                               // 2 source words
+                               d1 = FB_READL(src--);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0>>right | d1<<left;
++                              d0 = d0 << left | d1 >> right;
+                       }
+                       d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                       FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem 
*dst, int dst_idx,
+                       n /= bits;
+                       while ((n >= 4) && !bswapmask) {
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               d1 = FB_READL(src--);
+-                              FB_WRITEL(d0 >> right | d1 << left, dst--);
++                              FB_WRITEL(d0 << left | d1 >> right, dst--);
+                               d0 = d1;
+                               n -= 4;
+                       }
+                       while (n--) {
+                               d1 = FB_READL(src--);
+                               d1 = fb_rev_pixels_in_long(d1, bswapmask);
+-                              d0 = d0 >> right | d1 << left;
++                              d0 = d0 << left | d1 >> right;
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(d0, dst--);
+                               d0 = d1;
+                       }
+ 
+                       // Trailing bits
+-                      if (last) {
+-                              if (m <= left) {
++                      if (m) {
++                              if (m <= bits - left) {
+                                       // Single source word
+-                                      d0 >>= right;
++                                      d0 <<= left;
+                               } else {
+                                       // 2 source words
+                                       d1 = FB_READL(src);
+                                       d1 = fb_rev_pixels_in_long(d1,
+                                                               bswapmask);
+-                                      d0 = d0>>right | d1<<left;
++                                      d0 = d0 << left | d1 >> right;
+                               }
+                               d0 = fb_rev_pixels_in_long(d0, bswapmask);
+                               FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
+@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, const struct 
fb_copyarea *area)
+       u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
+       u32 height = area->height, width = area->width;
+       unsigned long const bits_per_line = p->fix.line_length*8u;
+-      unsigned long __iomem *dst = NULL, *src = NULL;
++      unsigned long __iomem *base = NULL;
+       int bits = BITS_PER_LONG, bytes = bits >> 3;
+-      int dst_idx = 0, src_idx = 0, rev_copy = 0;
++      unsigned dst_idx = 0, src_idx = 0, rev_copy = 0;
+       u32 bswapmask = fb_compute_bswapmask(p);
+ 
+       if (p->state != FBINFO_STATE_RUNNING)
+@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, const struct 
fb_copyarea *area)
+ 
+       // split the base of the framebuffer into a long-aligned address and the
+       // index of the first bit
+-      dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & 
~(bytes-1));
++      base = (unsigned long __iomem *)((unsigned long)p->screen_base & 
~(bytes-1));
+       dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
+       // add offset of source and target area
+       dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, const struct 
fb_copyarea *area)
+               while (height--) {
+                       dst_idx -= bits_per_line;
+                       src_idx -= bits_per_line;
+-                      dst += dst_idx >> (ffs(bits) - 1);
+-                      dst_idx &= (bytes - 1);
+-                      src += src_idx >> (ffs(bits) - 1);
+-                      src_idx &= (bytes - 1);
+-                      bitcpy_rev(p, dst, dst_idx, src, src_idx, bits,
++                      bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits,
++                              base + (src_idx / bits), src_idx % bits, bits,
+                               width*p->var.bits_per_pixel, bswapmask);
+               }
+       } else {
+               while (height--) {
+-                      dst += dst_idx >> (ffs(bits) - 1);
+-                      dst_idx &= (bytes - 1);
+-                      src += src_idx >> (ffs(bits) - 1);
+-                      src_idx &= (bytes - 1);
+-                      bitcpy(p, dst, dst_idx, src, src_idx, bits,
++                      bitcpy(p, base + (dst_idx / bits), dst_idx % bits,
++                              base + (src_idx / bits), src_idx % bits, bits,
+                               width*p->var.bits_per_pixel, bswapmask);
+                       dst_idx += bits_per_line;
+                       src_idx += bits_per_line;
+diff --git a/drivers/video/matrox/matroxfb_accel.c 
b/drivers/video/matrox/matroxfb_accel.c
+index 8335a6fe303e..0d5cb85d071a 100644
+--- a/drivers/video/matrox/matroxfb_accel.c
++++ b/drivers/video/matrox/matroxfb_accel.c
+@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_info *minfo)
+       minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | 
M_DWG_SGNZERO | M_DWG_SHIFTZERO;
+       if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
+       minfo->accel.m_opmode = mopmode;
++      minfo->accel.m_access = maccess;
++      minfo->accel.m_pitch = mpitch;
+ }
+ 
+ EXPORT_SYMBOL(matrox_cfbX_init);
+ 
++static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo)
++{
++      mga_outl(M_MACCESS, minfo->accel.m_access);
++      mga_outl(M_PITCH, minfo->accel.m_pitch);
++}
++
+ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int 
sy,
+                              int sx, int dy, int dx, int height, int width)
+ {
+@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct matrox_fb_info 
*minfo, int vxres, int sy,
+       CRITBEGIN
+ 
+       if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-              mga_fifo(2);
++              mga_fifo(4);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | 
M_DWG_SGNZERO |
+                        M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_AR5, vxres);
+@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct matrox_fb_info 
*minfo, int vxres, int sy,
+               start = sy*vxres+sx+curr_ydstorg(minfo);
+               end = start+width;
+       } else {
+-              mga_fifo(3);
++              mga_fifo(5);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL 
| M_DWG_REPLACE);
+               mga_outl(M_SGN, 5);
+               mga_outl(M_AR5, -vxres);
+@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct matrox_fb_info 
*minfo, int vxres, int sy,
+               start = end+width;
+               dy += height-1;
+       }
+-      mga_fifo(4);
++      mga_fifo(6);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_AR0, end);
+       mga_outl(M_AR3, start);
+       mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info 
*minfo, int vxres,
+       CRITBEGIN
+ 
+       if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
+-              mga_fifo(2);
++              mga_fifo(4);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | 
M_DWG_SGNZERO |
+                       M_DWG_BFCOL | M_DWG_REPLACE);
+               mga_outl(M_AR5, vxres);
+@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info 
*minfo, int vxres,
+               start = sy*vxres+sx+curr_ydstorg(minfo);
+               end = start+width;
+       } else {
+-              mga_fifo(3);
++              mga_fifo(5);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL 
| M_DWG_REPLACE);
+               mga_outl(M_SGN, 5);
+               mga_outl(M_AR5, -vxres);
+@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info 
*minfo, int vxres,
+               start = end+width;
+               dy += height-1;
+       }
+-      mga_fifo(5);
++      mga_fifo(7);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_AR0, end);
+       mga_outl(M_AR3, start);
+       mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
+@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct matrox_fb_info 
*minfo, u_int32_t color,
+ 
+       CRITBEGIN
+ 
+-      mga_fifo(5);
++      mga_fifo(7);
++      matrox_accel_restore_maccess(minfo);
+       mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
+       mga_outl(M_FCOL, color);
+       mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info 
*minfo, u_int32_t bgx,
+       width >>= 1;
+       sx >>= 1;
+       if (width) {
+-              mga_fifo(5);
++              mga_fifo(7);
++              matrox_accel_restore_maccess(minfo);
+               mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
+               mga_outl(M_FCOL, bgx);
+               mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
+@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info 
*minfo, u_int32_t fgx,
+ 
+       CRITBEGIN
+ 
+-      mga_fifo(3);
++      mga_fifo(5);
++      matrox_accel_restore_maccess(minfo);
+       if (easy)
+               mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | 
M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE);
+       else
+@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info 
*minfo, u_int32_t fgx,
+       fxbndry = ((xx + width - 1) << 16) | xx;
+       mmio = minfo->mmio.vbase;
+ 
+-      mga_fifo(6);
++      mga_fifo(8);
++      matrox_accel_restore_maccess(minfo);
+       mga_writel(mmio, M_FXBNDRY, fxbndry);
+       mga_writel(mmio, M_AR0, ar0);
+       mga_writel(mmio, M_AR3, 0);
+diff --git a/drivers/video/matrox/matroxfb_base.h 
b/drivers/video/matrox/matroxfb_base.h
+index 11ed57bb704e..556d96ce40bf 100644
+--- a/drivers/video/matrox/matroxfb_base.h
++++ b/drivers/video/matrox/matroxfb_base.h
+@@ -307,6 +307,8 @@ struct matrox_accel_data {
+ #endif
+       u_int32_t       m_dwg_rect;
+       u_int32_t       m_opmode;
++      u_int32_t       m_access;
++      u_int32_t       m_pitch;
+ };
+ 
+ struct v4l2_queryctrl;
+diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
+index aba7686b1a32..ac2cf6dcc598 100644
+--- a/drivers/video/tgafb.c
++++ b/drivers/video/tgafb.c
+@@ -1146,222 +1146,57 @@ copyarea_line_32bpp(struct fb_info *info, u32 dy, u32 
sy,
+       __raw_writel(TGA_MODE_SBM_24BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG);
+ }
+ 
+-/* The general case of forward copy in 8bpp mode.  */
++/* The (almost) general case of backward copy in 8bpp mode.  */
+ static inline void
+-copyarea_foreward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
+-                     u32 height, u32 width, u32 line_length)
++copyarea_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
++            u32 height, u32 width, u32 line_length,
++            const struct fb_copyarea *area)
+ {
+       struct tga_par *par = (struct tga_par *) info->par;
+-      unsigned long i, copied, left;
+-      unsigned long dpos, spos, dalign, salign, yincr;
+-      u32 smask_first, dmask_first, dmask_last;
+-      int pixel_shift, need_prime, need_second;
+-      unsigned long n64, n32, xincr_first;
++      unsigned i, yincr;
++      int depos, sepos, backward, last_step, step;
++      u32 mask_last;
++      unsigned n32;
+       void __iomem *tga_regs;
+       void __iomem *tga_fb;
+ 
+-      yincr = line_length;
+-      if (dy > sy) {
+-              dy += height - 1;
+-              sy += height - 1;
+-              yincr = -yincr;
+-      }
+-
+-      /* Compute the offsets and alignments in the frame buffer.
+-         More than anything else, these control how we do copies.  */
+-      dpos = dy * line_length + dx;
+-      spos = sy * line_length + sx;
+-      dalign = dpos & 7;
+-      salign = spos & 7;
+-      dpos &= -8;
+-      spos &= -8;
+-
+-      /* Compute the value for the PIXELSHIFT register.  This controls
+-         both non-co-aligned source and destination and copy direction.  */
+-      if (dalign >= salign)
+-              pixel_shift = dalign - salign;
+-      else
+-              pixel_shift = 8 - (salign - dalign);
+-
+-      /* Figure out if we need an additional priming step for the
+-         residue register.  */
+-      need_prime = (salign > dalign);
+-      if (need_prime)
+-              dpos -= 8;
+-
+-      /* Begin by copying the leading unaligned destination.  Copy enough
+-         to make the next destination address 32-byte aligned.  */
+-      copied = 32 - (dalign + (dpos & 31));
+-      if (copied == 32)
+-              copied = 0;
+-      xincr_first = (copied + 7) & -8;
+-      smask_first = dmask_first = (1ul << copied) - 1;
+-      smask_first <<= salign;
+-      dmask_first <<= dalign + need_prime*8;
+-      if (need_prime && copied > 24)
+-              copied -= 8;
+-      left = width - copied;
+-
+-      /* Care for small copies.  */
+-      if (copied > width) {
+-              u32 t;
+-              t = (1ul << width) - 1;
+-              t <<= dalign + need_prime*8;
+-              dmask_first &= t;
+-              left = 0;
+-      }
+-
+-      /* Attempt to use 64-byte copies.  This is only possible if the
+-         source and destination are co-aligned at 64 bytes.  */
+-      n64 = need_second = 0;
+-      if ((dpos & 63) == (spos & 63)
+-          && (height == 1 || line_length % 64 == 0)) {
+-              /* We may need a 32-byte copy to ensure 64 byte alignment.  */
+-              need_second = (dpos + xincr_first) & 63;
+-              if ((need_second & 32) != need_second)
+-                      printk(KERN_ERR "tgafb: need_second wrong\n");
+-              if (left >= need_second + 64) {
+-                      left -= need_second;
+-                      n64 = left / 64;
+-                      left %= 64;
+-              } else
+-                      need_second = 0;
+-      }
+-
+-      /* Copy trailing full 32-byte sections.  This will be the main
+-         loop if the 64 byte loop can't be used.  */
+-      n32 = left / 32;
+-      left %= 32;
+-
+-      /* Copy the trailing unaligned destination.  */
+-      dmask_last = (1ul << left) - 1;
+-
+-      tga_regs = par->tga_regs_base;
+-      tga_fb = par->tga_fb_base;
+-
+-      /* Set up the MODE and PIXELSHIFT registers.  */
+-      __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_COPY, tga_regs+TGA_MODE_REG);
+-      __raw_writel(pixel_shift, tga_regs+TGA_PIXELSHIFT_REG);
+-      wmb();
+-
+-      for (i = 0; i < height; ++i) {
+-              unsigned long j;
+-              void __iomem *sfb;
+-              void __iomem *dfb;
+-
+-              sfb = tga_fb + spos;
+-              dfb = tga_fb + dpos;
+-              if (dmask_first) {
+-                      __raw_writel(smask_first, sfb);
+-                      wmb();
+-                      __raw_writel(dmask_first, dfb);
+-                      wmb();
+-                      sfb += xincr_first;
+-                      dfb += xincr_first;
+-              }
+-
+-              if (need_second) {
+-                      __raw_writel(0xffffffff, sfb);
+-                      wmb();
+-                      __raw_writel(0xffffffff, dfb);
+-                      wmb();
+-                      sfb += 32;
+-                      dfb += 32;
+-              }
+-
+-              if (n64 && (((unsigned long)sfb | (unsigned long)dfb) & 63))
+-                      printk(KERN_ERR
+-                             "tgafb: misaligned copy64 (s:%p, d:%p)\n",
+-                             sfb, dfb);
+-
+-              for (j = 0; j < n64; ++j) {
+-                      __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC);
+-                      wmb();
+-                      __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST);
+-                      wmb();
+-                      sfb += 64;
+-                      dfb += 64;
+-              }
+-
+-              for (j = 0; j < n32; ++j) {
+-                      __raw_writel(0xffffffff, sfb);
+-                      wmb();
+-                      __raw_writel(0xffffffff, dfb);
+-                      wmb();
+-                      sfb += 32;
+-                      dfb += 32;
+-              }
+-
+-              if (dmask_last) {
+-                      __raw_writel(0xffffffff, sfb);
+-                      wmb();
+-                      __raw_writel(dmask_last, dfb);
+-                      wmb();
+-              }
+-
+-              spos += yincr;
+-              dpos += yincr;
++      /* Do acceleration only if we are aligned on 8 pixels */
++      if ((dx | sx | width) & 7) {
++              cfb_copyarea(info, area);
++              return;
+       }
+ 
+-      /* Reset the MODE register to normal.  */
+-      __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG);
+-}
+-
+-/* The (almost) general case of backward copy in 8bpp mode.  */
+-static inline void
+-copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
+-                     u32 height, u32 width, u32 line_length,
+-                     const struct fb_copyarea *area)
+-{
+-      struct tga_par *par = (struct tga_par *) info->par;
+-      unsigned long i, left, yincr;
+-      unsigned long depos, sepos, dealign, sealign;
+-      u32 mask_first, mask_last;
+-      unsigned long n32;
+-      void __iomem *tga_regs;
+-      void __iomem *tga_fb;
+-
+       yincr = line_length;
+       if (dy > sy) {
+               dy += height - 1;
+               sy += height - 1;
+               yincr = -yincr;
+       }
++      backward = dy == sy && dx > sx && dx < sx + width;
+ 
+       /* Compute the offsets and alignments in the frame buffer.
+          More than anything else, these control how we do copies.  */
+-      depos = dy * line_length + dx + width;
+-      sepos = sy * line_length + sx + width;
+-      dealign = depos & 7;
+-      sealign = sepos & 7;
+-
+-      /* ??? The documentation appears to be incorrect (or very
+-         misleading) wrt how pixel shifting works in backward copy
+-         mode, i.e. when PIXELSHIFT is negative.  I give up for now.
+-         Do handle the common case of co-aligned backward copies,
+-         but frob everything else back on generic code.  */
+-      if (dealign != sealign) {
+-              cfb_copyarea(info, area);
+-              return;
+-      }
+-
+-      /* We begin the copy with the trailing pixels of the
+-         unaligned destination.  */
+-      mask_first = (1ul << dealign) - 1;
+-      left = width - dealign;
+-
+-      /* Care for small copies.  */
+-      if (dealign > width) {
+-              mask_first ^= (1ul << (dealign - width)) - 1;
+-              left = 0;
+-      }
++      depos = dy * line_length + dx;
++      sepos = sy * line_length + sx;
++      if (backward)
++              depos += width, sepos += width;
+ 
+       /* Next copy full words at a time.  */
+-      n32 = left / 32;
+-      left %= 32;
++      n32 = width / 32;
++      last_step = width % 32;
+ 
+       /* Finally copy the unaligned head of the span.  */
+-      mask_last = -1 << (32 - left);
++      mask_last = (1ul << last_step) - 1;
++
++      if (!backward) {
++              step = 32;
++              last_step = 32;
++      } else {
++              step = -32;
++              last_step = -last_step;
++              sepos -= 32;
++              depos -= 32;
++      }
+ 
+       tga_regs = par->tga_regs_base;
+       tga_fb = par->tga_fb_base;
+@@ -1378,25 +1213,33 @@ copyarea_backward_8bpp(struct fb_info *info, u32 dx, 
u32 dy, u32 sx, u32 sy,
+ 
+               sfb = tga_fb + sepos;
+               dfb = tga_fb + depos;
+-              if (mask_first) {
+-                      __raw_writel(mask_first, sfb);
+-                      wmb();
+-                      __raw_writel(mask_first, dfb);
+-                      wmb();
+-              }
+ 
+-              for (j = 0; j < n32; ++j) {
+-                      sfb -= 32;
+-                      dfb -= 32;
++              for (j = 0; j < n32; j++) {
++                      if (j < 2 && j + 1 < n32 && !backward &&
++                          !(((unsigned long)sfb | (unsigned long)dfb) & 63)) {
++                              do {
++                                      __raw_writel(sfb - tga_fb, 
tga_regs+TGA_COPY64_SRC);
++                                      wmb();
++                                      __raw_writel(dfb - tga_fb, 
tga_regs+TGA_COPY64_DST);
++                                      wmb();
++                                      sfb += 64;
++                                      dfb += 64;
++                                      j += 2;
++                              } while (j + 1 < n32);
++                              j--;
++                              continue;
++                      }
+                       __raw_writel(0xffffffff, sfb);
+                       wmb();
+                       __raw_writel(0xffffffff, dfb);
+                       wmb();
++                      sfb += step;
++                      dfb += step;
+               }
+ 
+               if (mask_last) {
+-                      sfb -= 32;
+-                      dfb -= 32;
++                      sfb += last_step - step;
++                      dfb += last_step - step;
+                       __raw_writel(mask_last, sfb);
+                       wmb();
+                       __raw_writel(mask_last, dfb);
+@@ -1457,14 +1300,9 @@ tgafb_copyarea(struct fb_info *info, const struct 
fb_copyarea *area)
+       else if (bpp == 32)
+               cfb_copyarea(info, area);
+ 
+-      /* Detect overlapping source and destination that requires
+-         a backward copy.  */
+-      else if (dy == sy && dx > sx && dx < sx + width)
+-              copyarea_backward_8bpp(info, dx, dy, sx, sy, height,
+-                                     width, line_length, area);
+       else
+-              copyarea_foreward_8bpp(info, dx, dy, sx, sy, height,
+-                                     width, line_length);
++              copyarea_8bpp(info, dx, dy, sx, sy, height,
++                            width, line_length, area);
+ }
+ 
+ 
+diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
+index 8807fe501d20..436625457311 100644
+--- a/drivers/virtio/virtio_balloon.c
++++ b/drivers/virtio/virtio_balloon.c
+@@ -305,6 +305,12 @@ static int balloon(void *_vballoon)
+               else if (diff < 0)
+                       leak_balloon(vb, -diff);
+               update_balloon_size(vb);
++
++              /*
++               * For large balloon changes, we could spend a lot of time
++               * and always have work to do.  Be nice if preempt disabled.
++               */
++              cond_resched();
+       }
+       return 0;
+ }
+diff --git a/fs/locks.c b/fs/locks.c
+index fcc50ab71cc6..d4f1d89d9bc6 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1253,11 +1253,10 @@ int __break_lease(struct inode *inode, unsigned int 
mode)
+ 
+ restart:
+       break_time = flock->fl_break_time;
+-      if (break_time != 0) {
++      if (break_time != 0)
+               break_time -= jiffies;
+-              if (break_time == 0)
+-                      break_time++;
+-      }
++      if (break_time == 0)
++              break_time++;
+       locks_insert_block(flock, new_fl);
+       unlock_flocks();
+       error = wait_event_interruptible_timeout(new_fl->fl_wait,
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index 7e13eb428cb2..50d7cb1ee947 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -762,6 +762,7 @@ struct ata_port {
+       unsigned long           qc_allocated;
+       unsigned int            qc_active;
+       int                     nr_active_links; /* #links with active qcs */
++      unsigned int            last_tag;       /* track next tag hw expects */
+ 
+       struct ata_link         link;           /* host default link */
+       struct ata_link         *slave_link;    /* see ata_slave_link_init() */


Reply via email to