Andrea Arcangeli wrote: > 2.2.18pre15aa1 is here (I will include in the next aa patchkit): > > >ftp://ftp.us.kernel.org/pub/linux/kernel/people/andrea/patches/v2.2/2.2.18pre15aa1/PIII-3.bz2 > > Such patch is been generated by a mix of PIII 2.2.x patch and the PIII 2.4.x > support plus some additional change. (at the end it's very similar to 2.4.x, > but the 2.2.x version is been very useful too for doing comparison) I've made a few correctness changes to this code. Items that needed to be corrected for include the facts that the XMM feature bit is an Intel specific bit that other vendors may use for other things, so you need to test vendor == INTEL as well as the feature bit before enabling XMM and FXSR features (AMD also has fxsr, but we currently don't do anything with it). I moved the mmu_cr4_features into the boot_cpu_data struct instead of as a global variable of type initdata because it is easier to test mmu_cr4_features for a specific bit (such as XMMEXCEPT) to detect presence of XMM instructions than it is to test the three items (vendor == INTEL && capabilities & FEATURE_XMM && !nofxsr) to determine things such as XMM allowed. This works since cr4 is currently Intel only and the presence of XMMEXCEPT implies all of the above things. Since there are PII CPUs that have FXSR support but not mxcsr registers, I rekeyed the mxcsr routines to check for XMM instead of FXSR. I also added Gabriel Paubert's more correct tag word conversion routines. That's all the changes I made here, but it's the list of changes needed for me to be able to put my P4 patch on top of this instead of having it undo a lot of the work in this patch. Andrea, if you could, please integrate this into your own stuff so that we can keep a unified direction on this stuff. -- Doug Ledford <[EMAIL PROTECTED]> http://people.redhat.com/dledford Please check my web site for aic7xxx updates/answers before e-mailing me about problems
diff -U 3 -r linux-PIII-3/arch/i386/kernel/head.S linux-PIII-3.1/arch/i386/kernel/head.S --- linux-PIII-3/arch/i386/kernel/head.S Fri Jan 15 01:57:25 1999 +++ linux-PIII-3.1/arch/i386/kernel/head.S Wed Oct 25 14:14:54 2000 @@ -32,7 +32,8 @@ #define X86_HARD_MATH CPU_PARAMS+6 #define X86_CPUID CPU_PARAMS+8 #define X86_CAPABILITY CPU_PARAMS+12 -#define X86_VENDOR_ID CPU_PARAMS+16 +#define X86_MMU_CR4 CPU_PARAMS+16 +#define X86_VENDOR_ID CPU_PARAMS+20 /* * swapper_pg_dir is the main page directory, address 0x00101000 @@ -59,7 +60,7 @@ * NOTE! We have to correct for the fact that we're * not yet offset PAGE_OFFSET.. */ -#define cr4_bits mmu_cr4_features-__PAGE_OFFSET +#define cr4_bits X86_MMU_CR4-__PAGE_OFFSET movl %cr4,%eax # Turn on 4Mb pages orl cr4_bits,%eax movl %eax,%cr4 @@ -214,8 +215,8 @@ movb ready,%al # First CPU if 0 orb %al,%al jz 4f # First CPU skip this stuff - movl %cr4,%eax # Turn on 4Mb pages - orl $16,%eax + movl %cr4,%eax # Set CR4 register to match first CPU + orl X86_MMU_CR4,%eax movl %eax,%cr4 movl %cr3,%eax # Intel specification clarification says movl %eax,%cr3 # to do this. Maybe it makes a difference. diff -U 3 -r linux-PIII-3/arch/i386/kernel/i387.c linux-PIII-3.1/arch/i386/kernel/i387.c --- linux-PIII-3/arch/i386/kernel/i387.c Wed Oct 25 13:24:44 2000 +++ linux-PIII-3.1/arch/i386/kernel/i387.c Wed Oct 25 12:55:22 2000 @@ -18,7 +18,8 @@ #include <asm/ptrace.h> #include <asm/uaccess.h> -#define HAVE_FXSR (cpu_has_fxsr) +#define HAVE_FXSR (boot_cpu_data.mmu_cr4_features & X86_FEATURE_OSXFSR) +#define HAVE_XMM (boot_cpu_data.mmu_cr4_features & X86_FEATURE_OSXMMEXCEPT) #ifdef CONFIG_MATH_EMULATION #define HAVE_HWFP (boot_cpu_data.hard_math) @@ -29,13 +30,13 @@ /* * The _current_ task is using the FPU for the first time * so initialize it and set the mxcsr to its default - * value at reset if we support FXSR and then + * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ void init_fpu(void) { __asm__("fninit"); - if ( HAVE_FXSR ) + if ( HAVE_XMM ) load_mxcsr(0x1f80); current->used_math = 1; @@ -75,16 +76,16 @@ static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) { - unsigned short ret = 0; - int i; - - for ( i = 0 ; i < 8 ; i++ ) { - if ( (twd & 0x3) != 0x3 ) { - ret |= (1 << i); - } - twd = twd >> 2; - } - return ret; + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; } static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) @@ -101,8 +102,8 @@ if ( twd & 0x1 ) { st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); - switch ( st->exponent ) { - case 0xffff: + switch ( st->exponent & 0x7fff ) { + case 0x7fff: tag = 2; /* Special */ break; case 0x0000: @@ -165,7 +166,7 @@ unsigned short get_fpu_mxcsr( struct task_struct *tsk ) { - if ( HAVE_FXSR ) { + if ( HAVE_XMM ) { return tsk->tss.i387.fxsave.mxcsr; } else { return 0x1f80; @@ -201,7 +202,7 @@ void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ) { - if ( HAVE_FXSR ) { + if ( HAVE_XMM ) { tsk->tss.i387.fxsave.mxcsr = mxcsr; } } diff -U 3 -r linux-PIII-3/arch/i386/kernel/setup.c linux-PIII-3.1/arch/i386/kernel/setup.c --- linux-PIII-3/arch/i386/kernel/setup.c Wed Oct 25 13:24:45 2000 +++ linux-PIII-3.1/arch/i386/kernel/setup.c Wed Oct 25 12:58:28 2000 @@ -71,8 +71,6 @@ char ignore_irq13 = 0; /* set if exception 16 works */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; -unsigned long mmu_cr4_features __initdata = 0; - /* * Bus types .. */ diff -U 3 -r linux-PIII-3/include/asm-i386/processor.h linux-PIII-3.1/include/asm-i386/processor.h --- linux-PIII-3/include/asm-i386/processor.h Wed Oct 25 13:24:44 2000 +++ linux-PIII-3.1/include/asm-i386/processor.h Wed Oct 25 13:18:40 2000 @@ -29,6 +29,7 @@ char rfu; int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ __u32 x86_capability; + __u32 mmu_cr4_features; char x86_vendor_id[16]; char x86_model_id[64]; int x86_cache_size; /* in KB - valid for CPUS which support this @@ -113,9 +114,11 @@ #define cpu_has_vme \ (boot_cpu_data.x86_capability & X86_FEATURE_VME) #define cpu_has_fxsr \ - (boot_cpu_data.x86_capability & X86_FEATURE_FXSR) + ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && \ + (boot_cpu_data.x86_capability & X86_FEATURE_FXSR)) #define cpu_has_xmm \ - (boot_cpu_data.x86_capability & X86_FEATURE_XMM) + ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && \ + (boot_cpu_data.x86_capability & X86_FEATURE_XMM)) extern char ignore_irq13; @@ -158,11 +161,10 @@ * enable), so that any CPU's that boot up * after us can get the correct flags. */ -extern unsigned long mmu_cr4_features; static inline void set_in_cr4 (unsigned long mask) { - mmu_cr4_features |= mask; + boot_cpu_data.mmu_cr4_features |= mask; __asm__("movl %%cr4,%%eax\n\t" "orl %0,%%eax\n\t" "movl %%eax,%%cr4\n" @@ -172,7 +174,7 @@ static inline void clear_in_cr4 (unsigned long mask) { - mmu_cr4_features &= ~mask; + boot_cpu_data.mmu_cr4_features &= ~mask; __asm__("movl %%cr4,%%eax\n\t" "andl %0,%%eax\n\t" "movl %%eax,%%cr4\n"