Well. Now, I trust that the problem is in X initialization. I've modified the driver that R.C. gently sent us. It didn't worked for me, because of the IRQ issues I posted yesterday. As the GUI DMA test doesn't need IRQs, I've taken out that initialization and modified the device_read function to make the GUI DMA test.
I'm attatching the code here, to compile just type: gcc -O2 -D__KERNEL__ -DMODULE -DLINUX -I/usr/include/linux -I /usr/src/linux/include/ -c ati_mach64gui.c You must also create the device file: #mknod atidma c 240 0 And here comes the results. I just rebooted my machine, logged in, and installed the driver: # insmod ./ati_mach64gui.o PCI: Found IRQ 11 for device 01:00.0 PCI: Sharing IRQ 11 with 00:02.0 PCI: Sharing IRQ 11 with 00:05.0 Found ATI card at 0xf5000000. # Now, WITHOUT STARTING X, test the GUI DMA transfer, for example, typing: # dd if=/dev/atidma of=atiout.dat bs=512 count=1 It worked, the module dumped: (Before DMA Transfer) PAT_REG0 = 0x11111111 (After DMA Transfer) PAT_REG0 = 0x22222222 Then, after a fresh reboot, I tried to make the same test, but under X. The results were: (Before DMA Transfer) PAT_REG0 = 0x11111111 do_wait_for_idle failed! GUI_STAT=0x01ff0001 (After DMA Transfer) PAT_REG0 = 0x11111111 Just the same behaviour we are suffering with the mach64 DRI driver. Furthermore, the graphic card hangs, only the cursor works, and I had to login from another computer and reboot the machine. I have tried to avoid this behaviour resetting the engine when the do_wait_for_idle fails, but no luck. So, what next? Have anybody any idea about the difference between XFree 3.x and XFree 4.x initialization? Could the problem be some bad interlocking between X and the DRI module? What do you think about this? I'm going to put the dump_engine_info into this little module. I think that this module is a good way to test the DMAGUI Bus Mastering, comparing the register values out of X with the ones obtained under X. Best regards. -- M. Teira
#include <config.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/pci_ids.h> #include <linux/compatmac.h> #include <linux/devfs_fs_kernel.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/mm.h> #include <linux/slab.h> #include <asm/processor.h> #include <asm-i386/io.h> /* 1. Find the device. call pci_register_driver() 2. Enable devices: pci_enable_device() Call pci_set_master() to enable bus master. A field, by default, is 640x240x2 (307200). This corresponds to exactly 75 pages when they are 4K sized (AFAIK the minimum page size). The main restriction is that they must be 4K aligned. */ #define SUCCESS 0 #define MAJOR_NUM 240 #define DEVICE_NAME "atidma" #define DEBUG 1 #define CAP_INT_CNTL 0x908 #define CAP_INT_STATUS 0x90C #define RADEON_CAP0_VBI0_OFFSET 0x093C #define RADEON_CAP0_VBI1_OFFSET 0x0940 #define RADEON_CAP0_VBI_V_WINDOW 0x0944 #define RADEON_CAP0_VBI_H_WINDOW 0x0948 #define RADEON_CAP0_CONFIG 0x0958 #define regw(a,b) *(MMR+a)=b #define regr(a) *(MMR+a) #define MEM_BASE 0x02480000 #define BUS_CNTL 0x28+(256) #define CRTC_INT_CNTL 0x06+(256) #define BM_SYSTEM_TABLE 0x6F #define CAPTURE_BUF0_OFFSET 0x20 #define CAPTURE_BUF1_OFFSET 0x21 #define CAPTURE_DEBUG 0x19 #define SYSTEM_TO_FRAME_BUFFER 0x0 #define FRAME_BUFFER_TO_SYSTEM 0x1 #define BM_ADDR 0x0648 #define SRC_CNTL 0x6D+(256) #define PAT_REG0 0xA0+(256) #define APERTURE_OFFSET 0x7ff800 #define BM_GUI_TABLE 0x6E #define BUS_MASTER_DIS (1 << 6) #define SRC_BM_ENABLE (1 << 8) #define SRC_BM_SYNC (1 << 9) #define SRC_BM_OP_FRAME_TO_SYSTEM (0 << 10) #define SRC_BM_OP_SYSTEM_TO_FRAME (1 << 10) #define SRC_BM_OP_REG_TO_SYSTEM (2 << 10) #define SRC_BM_OP_SYSTEM_TO_REG (3 << 10) #define DST_HEIGHT_WIDTH 0x46+(256) #define FIFO_STAT 0xC4+(256) #define FIFO_SLOT_MASK 0x0000ffff #define GUI_STAT 0xCE + (256) #define GUI_ACTIVE (1 << 0) #define GUI_ENGINE_ENABLE (1 << 8) #define GEN_TEST_CNTL 0x34+(256) #define LAST_DESCRIPTOR 1 << 31 #define NUM_BUFS 5 /* Should be multiple of 2. */ #define CHAR_BUF_SIZE 128 /* Should be a multiple of 4 */ #define H_SIZE 1560 #define FIELD_SIZE 307200 /*#define BUF_SIZE H_SIZE*2*/ static unsigned int volatile * MMR = NULL, *BUFF0, *BUFF1, IRQ; static unsigned const char volatile * ATIFB = NULL; static unsigned int saved_bus_cntl, saved_crtc_cntl; static volatile unsigned int * full_page = NULL; static volatile int Device_Open = 0; int ati_module_init(); void ati_module_remove(); void ati_bh(unsigned long); static int ati_probe_pci(struct pci_dev *, const struct pci_device_id *); static void ati_remove_pci(struct pci_dev *); static int do_wait_for_fifo( int entries ) { int slots=0,i; for ( i = 0 ; i < 1000000 ; i++ ) { slots = regr( FIFO_STAT ) & FIFO_SLOT_MASK; if ( slots <= (0x8000 >> entries ) ) return 0; udelay( 1 ); } printk( "do_wait_for_fifo failed. Slots=%d Entries=%d\n", slots, entries ); return -EBUSY; } static int do_wait_for_idle( ) { int i, ret; ret = do_wait_for_fifo( 16 ); if ( ret < 0 ) return ret; for ( i = 0 ; i < 1000000 ; i++ ) { if ( ! (regr( GUI_STAT ) & GUI_ACTIVE) ) { return 0; } udelay( 1 ); } printk( "do_wait_for_idle failed! GUI_STAT=0x%08x\n", regr( GUI_STAT ) ); return -EBUSY; } static void do_engine_reset( void ) { unsigned int bus_cntl, gen_test_cntl; bus_cntl = regr( BUS_CNTL ); regw( BUS_CNTL, bus_cntl | BUS_MASTER_DIS ); gen_test_cntl = regr( GEN_TEST_CNTL ); regw( GEN_TEST_CNTL, gen_test_cntl & ~GUI_ENGINE_ENABLE ); gen_test_cntl = regr( GEN_TEST_CNTL ); regw( GEN_TEST_CNTL, gen_test_cntl | GUI_ENGINE_ENABLE ); bus_cntl = regr( BUS_CNTL ); regw( BUS_CNTL, bus_cntl | 0x00a00000 ); } static void __devexit ati_remove_pci(struct pci_dev *pdev) { int i; pci_release_regions(pdev); /* Cast to shut up the compiler. */ iounmap((void *) ATIFB); iounmap((void *) MMR); devfs_unregister_chrdev(MAJOR_NUM, DEVICE_NAME); pci_disable_device(pdev); if (full_page != NULL) free_pages((unsigned long) full_page, 7); printk("Removed ATI card \n" ); } static int device_open(struct inode *inode, struct file *file) { unsigned int temp; int a; // This is not race proof. if (Device_Open) return -EBUSY; Device_Open++; // Set VBI Window size temp = regr (BUS_CNTL); saved_bus_cntl = temp; temp = (temp | 0x08000000) & ~BUS_MASTER_DIS; // enable mm regs and bus mastering. regw (BUS_CNTL, temp); temp = regr( CRTC_INT_CNTL); saved_crtc_cntl = temp; return SUCCESS; } static int device_release(struct inode *inode, struct file *file) { /* Restore registers */ regw (BUS_CNTL, saved_bus_cntl); regw (CRTC_INT_CNTL, saved_crtc_cntl); Device_Open --; return 0; } static ssize_t device_read( struct file *file, char *u_buffer, size_t length, loff_t *offset) { struct pci_pool *pool; unsigned int table_addr, data_addr; unsigned int *table,*data; void *cpu_addr_table, *cpu_addr_data; int i; unsigned int bus_cntl, src_cntl; pool = pci_pool_create( "mach64", NULL, 0x4000, 0x4000, 0x4000, SLAB_ATOMIC ); cpu_addr_table = pci_pool_alloc( pool, SLAB_ATOMIC, &table_addr ); if ( !cpu_addr_table ) { printk( "table-memory alloc failed\n" ); return -ENOMEM; } table = cpu_addr_table; memset( cpu_addr_table, 0x0, 0x4000 ); cpu_addr_data = pci_pool_alloc( pool, SLAB_ATOMIC, &data_addr ); if ( !cpu_addr_data ) { printk( "data-memory alloc failed\n" ); return -ENOMEM; } data = cpu_addr_data; regw( SRC_CNTL, 0x00000000 ); regw( PAT_REG0, 0x11111111 ); printk( "(Before DMA Transfer) PAT_REG0 = 0x%08x\n", regr( PAT_REG0 ) ); data[0] = 0x000000a0; data[1] = 0x22222222; data[2] = 0x000000a0; data[3] = 0x22222222; data[4] = 0x000000a0; data[5] = 0x22222222; data[6] = 0x0000006d; data[7] = 0x00000000; table[0] = BM_ADDR + APERTURE_OFFSET; table[1] = data_addr; table[2] = 8 * sizeof( unsigned int ) | 0x80000000 | 0x40000000; table[3] = 0; regw( BM_GUI_TABLE, table_addr ); regw( SRC_CNTL, SRC_BM_ENABLE | SRC_BM_SYNC | SRC_BM_OP_SYSTEM_TO_REG ); regw( DST_HEIGHT_WIDTH, 0 ); regw( SRC_CNTL, 0 ); if (do_wait_for_idle()!=0) { do_engine_reset(); } /* Look the value in PAT_REG0 */ printk( "(After DMA Transfer) PAT_REG0 = 0x%08x\n", regr( PAT_REG0 ) ); pci_pool_free( pool, cpu_addr_table, table_addr ); pci_pool_free( pool, cpu_addr_data, data_addr ); pci_pool_destroy( pool ); } static struct file_operations Fops = { release:device_release, open:device_open, read:device_read }; static struct pci_device_id ati_pci_tbl[] __devinitdata = { {0x1002, 0x4c4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0}, {0,} }; MODULE_DEVICE_TABLE(pci, ati_pci_tbl); static struct pci_driver ati_driver = { name:"atidma", id_table:ati_pci_tbl, probe:ati_probe_pci, remove:ati_remove_pci }; static int ati_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) { int err, i; if ((err = pci_enable_device(pdev))) { printk("Cannot enable device!\n"); return -EIO; } pci_set_master(pdev); printk("Found ATI card at 0x%.8lx.\n", pci_resource_start(pdev, 0)); if (pci_request_regions(pdev, "atidma") != 0) { printk("Could not request IO regions!\n"); return -EBUSY; } ATIFB = __ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev,0), 0); MMR = __ioremap(pci_resource_start(pdev, 2), pci_resource_len(pdev,2), 0); BUFF0 = MMR + 0x20; BUFF1 = MMR + 0x21; return 0; } int __init ati_module_init() { int retval; retval = devfs_register_chrdev(MAJOR_NUM, DEVICE_NAME, &Fops); if (retval >= 0) return pci_module_init(&ati_driver); else return retval; } void __exit ati_module_remove() { pci_unregister_driver(&ati_driver); return ; } module_init(ati_module_init); module_exit(ati_module_remove);