Module Name:    src
Committed By:   macallan
Date:           Wed Jan 16 20:34:10 UTC 2013

Modified Files:
        src/sys/arch/arm/omap: omapfb.c

Log Message:
copy data in 32bit chunks whenever alignment permits
 -> very noticeable speedup


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/arch/arm/omap/omapfb.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/arm/omap/omapfb.c
diff -u src/sys/arch/arm/omap/omapfb.c:1.11 src/sys/arch/arm/omap/omapfb.c:1.12
--- src/sys/arch/arm/omap/omapfb.c:1.11	Wed Jan 16 00:09:27 2013
+++ src/sys/arch/arm/omap/omapfb.c	Wed Jan 16 20:34:10 2013
@@ -1,4 +1,4 @@
-/*	$NetBSD: omapfb.c,v 1.11 2013/01/16 00:09:27 macallan Exp $	*/
+/*	$NetBSD: omapfb.c,v 1.12 2013/01/16 20:34:10 macallan Exp $	*/
 
 /*
  * Copyright (c) 2010 Michael Lorenz
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: omapfb.c,v 1.11 2013/01/16 00:09:27 macallan Exp $");
+__KERNEL_RCSID(0, "$NetBSD: omapfb.c,v 1.12 2013/01/16 20:34:10 macallan Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -642,14 +642,43 @@ static void
 omapfb_rectfill(struct omapfb_softc *sc, int x, int y, int wi, int he,
      uint32_t colour)
 {
-	int width_in_bytes = wi * (sc->sc_depth >> 3);
+	int bpp = sc->sc_depth >> 3;	/* bytes per pixel */
+	int width_in_bytes = wi * bpp;
+	uint32_t daddr;
 
+	daddr = sc->sc_fbhwaddr + sc->sc_stride * y + x * bpp;
 	omapfb_wait_idle(sc);
 
+	/*
+	 * stupid hardware
+	 * in 32bit mode the DMA controller always writes 0 into the upper byte, so we
+	 * can use this mode only if we actually want that
+	 */
+	if (((colour & 0xff00) == 0) &&
+	   (((daddr | width_in_bytes) & 3) == 0)) {
+		/*
+		 * everything is properly aligned so we can copy stuff in
+		 * 32bit chunks instead of pixel by pixel
+		 */
+		wi = wi >> 1;
+
+		/* just in case */
+		colour |= colour << 16;
+
+		omapdma_write_ch_reg(0, OMAPDMAC_CSDPI,
+		    CSDPI_SRC_BURST_64 | CSDPI_DST_BURST_64 |
+		    CSDPI_DST_PACKED | CSDPI_WRITE_POSTED_EXCEPT_LAST |
+		    CSDPI_DATA_TYPE_32);
+	} else {
+		omapdma_write_ch_reg(0, OMAPDMAC_CSDPI,
+		    CSDPI_SRC_BURST_64 | CSDPI_DST_BURST_64 |
+		    CSDPI_DST_PACKED | CSDPI_WRITE_POSTED_EXCEPT_LAST |
+		    CSDPI_DATA_TYPE_16);
+	}
+
 	omapdma_write_ch_reg(0, OMAPDMAC_CEN, wi);
 	omapdma_write_ch_reg(0, OMAPDMAC_CFN, he);
-	omapdma_write_ch_reg(0, OMAPDMAC_CDSA,
-	    sc->sc_fbhwaddr + sc->sc_stride * y + x * (sc->sc_depth >> 3));
+	omapdma_write_ch_reg(0, OMAPDMAC_CDSA, daddr);
 	omapdma_write_ch_reg(0, OMAPDMAC_CCR,
 	    CCR_CONST_FILL_ENABLE | CCR_DST_AMODE_DOUBLE_INDEX |
 	    CCR_SRC_AMODE_CONST_ADDR);
@@ -665,14 +694,19 @@ static void
 omapfb_bitblt(struct omapfb_softc *sc, int xs, int ys, int xd, int yd,
     int wi, int he, int rop)
 {
-	int width_in_bytes = wi * (sc->sc_depth >> 3);
+	int bpp = sc->sc_depth >> 3;	/* bytes per pixel */
+	int width_in_bytes = wi * bpp;
+	
 	int hstep, vstep;
 	uint32_t saddr, daddr;
 
-	omapfb_wait_idle(sc);
-
-	saddr = sc->sc_fbhwaddr + sc->sc_stride * ys + xs * (sc->sc_depth >> 3);
-	daddr = sc->sc_fbhwaddr + sc->sc_stride * yd + xd * (sc->sc_depth >> 3);
+	/*
+	 * TODO:
+	 * - use 32bit transfers if we're properly aligned
+	 */
+	saddr = sc->sc_fbhwaddr + sc->sc_stride * ys + xs * bpp;
+	daddr = sc->sc_fbhwaddr + sc->sc_stride * yd + xd * bpp;
+		
 	if (ys < yd) {
 		/* need to go vertically backwards */
 		vstep = 1 - (sc->sc_stride + width_in_bytes);
@@ -686,12 +720,30 @@ omapfb_bitblt(struct omapfb_softc *sc, i
 		 * and destination pixels are on the same line
 		 */
 		hstep = 1 - (sc->sc_depth >> 2);
-		vstep = sc->sc_stride + (sc->sc_depth >> 3) * (wi - 1) + 1;
-		saddr += (sc->sc_depth >> 3) * (wi - 1);
-		daddr += (sc->sc_depth >> 3) * (wi - 1);
+		vstep = sc->sc_stride + bpp * (wi - 1) + 1;
+		saddr += bpp * (wi - 1);
+		daddr += bpp * (wi - 1);
 	} else
 		hstep = 1;
 
+	omapfb_wait_idle(sc);
+	if (((saddr | daddr | width_in_bytes) & 3) == 0) {
+		/*
+		 * everything is properly aligned so we can copy stuff in
+		 * 32bit chunks instead of pixel by pixel
+		 */
+		wi = wi >> 1;
+		omapdma_write_ch_reg(0, OMAPDMAC_CSDPI,
+		    CSDPI_SRC_BURST_64 | CSDPI_DST_BURST_64 |
+		    CSDPI_DST_PACKED | CSDPI_WRITE_POSTED_EXCEPT_LAST |
+		    CSDPI_DATA_TYPE_32);
+	} else {
+		omapdma_write_ch_reg(0, OMAPDMAC_CSDPI,
+		    CSDPI_SRC_BURST_64 | CSDPI_DST_BURST_64 |
+		    CSDPI_DST_PACKED | CSDPI_WRITE_POSTED_EXCEPT_LAST |
+		    CSDPI_DATA_TYPE_16);
+	}
+
 	omapdma_write_ch_reg(0, OMAPDMAC_CEN, wi);
 	omapdma_write_ch_reg(0, OMAPDMAC_CFN, he);
 	omapdma_write_ch_reg(0, OMAPDMAC_CSSA, saddr);

Reply via email to