On Fri, Jul 13, 2012 at 03:06:34PM +0200, Mark Kettenis wrote:
> > Date: Fri, 13 Jul 2012 14:57:11 +0200
> > From: "Christiano F. Haesbaert" <haesba...@openbsd.org>
> > 
> > Ok so here is the version with #ifndef SMALL_KERNEL, the only question
> > that remains is: do we keep the printf in dmesg ? or shall I take that
> > out ? 
> > 
> > I'd like to keep it so we may know if the detection is correctly just by
> > looking at sent dmesgs.
> 
> Can you shelve this until you:
> 
> a) Have the equivalent code for i386.

Sure, that should actually be the same code, I just need to make the
identifycpu() stuff run on each cpu on i386 as in amd64.

> b) Have something that actually uses this?

That won't be so simple, but ok :)

Let me explain why, when I started all this I wanted to favor migration
from procs on the same core, and then on the same package. So you would
pay a penalty to cross cores and a double penalty to cross packages.

But this is naive and stupid, sometimes, you want procs to go as far
away as possible: think of 2 procs that trash the cachelines, Brett and
I found a good metric from a paper from Alexandra Fedorova, it involves
calculating a "pain" parameter, but we're far away from making that
possible and viable, we have easier/bigger gains right now doing other
stuff.

I'll hold onto it, at least it's on the mailing lists so users can play
with it :).

Cheers 

> 
> Cheers,
> 
> Mark
> 
> > Index: arch/amd64/amd64/identcpu.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
> > retrieving revision 1.36
> > diff -d -u -p -r1.36 identcpu.c
> > --- arch/amd64/amd64/identcpu.c     22 Apr 2012 19:36:09 -0000      1.36
> > +++ arch/amd64/amd64/identcpu.c     13 Jul 2012 11:45:58 -0000
> > @@ -446,4 +446,126 @@ identifycpu(struct cpu_info *ci)
> >             sensordev_install(&ci->ci_sensordev);
> >  #endif
> >     }
> > +#ifndef SMALL_KERNEL
> > +   cpu_topology(ci);
> > +#endif
> > +}
> > +
> > +#ifndef SMALL_KERNEL
> > +/*
> > + * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
> > + */
> > +static int
> > +log2(unsigned int i)
> > +{
> > +   int ret = 0;
> > +
> > +   while (i >>= 1)
> > +           ret++;
> > +
> > +   return (ret);
> > +}
> > +
> > +static int
> > +mask_width(u_int x)
> > +{
> > +   int bit;
> > +   int mask;
> > +   int powerof2;
> > +
> > +   powerof2 = ((x - 1) & x) == 0;
> > +   mask = (x << (1 - powerof2)) - 1;
> > +
> > +   /* fls */
> > +   if (mask == 0)
> > +           return (0);
> > +   for (bit = 1; mask != 1; bit++)
> > +           mask = (unsigned int)mask >> 1;
> > +
> > +   return (bit);
> > +}
> > +
> > +/*
> > + * Build up cpu topology for given cpu, must run on the core itself.
> > + */
> > +void
> > +cpu_topology(struct cpu_info *ci)
> > +{
> > +   u_int32_t eax, ebx, ecx, edx;
> > +   u_int32_t apicid, max_apicid, max_coreid;
> > +   u_int32_t smt_bits, core_bits, pkg_bits;
> > +   u_int32_t smt_mask, core_mask, pkg_mask;
> > +   
> > +   /* We need at least apicid at CPUID 1 */
> > +   CPUID(0, eax, ebx, ecx, edx);
> > +   if (eax < 1)
> > +           goto no_topology;
> > +   
> > +   /* Initial apicid */
> > +   CPUID(1, eax, ebx, ecx, edx);
> > +   apicid = (ebx >> 24) & 0xff;
> > +   
> > +   if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
> > +           /* We need at least apicid at CPUID 0x80000008 */
> > +           CPUID(0x80000000, eax, ebx, ecx, edx);
> > +           if (eax < 0x80000008)
> > +                   goto no_topology;
> > +           
> > +           CPUID(0x80000008, eax, ebx, ecx, edx);
> > +           core_bits = (ecx >> 12) & 0xf;
> > +           if (core_bits == 0)
> > +                   goto no_topology;
> > +           /* So coreidsize 2 gives 3, 3 gives 7... */
> > +           core_mask = (1 << core_bits) - 1;
> > +           /* Core id is the least significant considering mask */
> > +           ci->ci_core_id = apicid & core_mask;
> > +           /* Pkg id is the upper remaining bits */
> > +           ci->ci_pkg_id = apicid & ~core_mask;
> > +           ci->ci_pkg_id >>= core_bits;
> > +   } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
> > +           /* We only support leaf 1/4 detection */
> > +           CPUID(0, eax, ebx, ecx, edx);
> > +           if (eax < 4)
> > +                   goto no_topology;
> > +           /* Get max_apicid */
> > +           CPUID(1, eax, ebx, ecx, edx);
> > +           max_apicid = (ebx >> 16) & 0xff;
> > +           /* Get max_coreid */
> > +           CPUID2(4, 0, eax, ebx, ecx, edx);
> > +           max_coreid = ((eax >> 26) & 0x3f) + 1;
> > +           /* SMT */
> > +           smt_bits = mask_width(max_apicid / max_coreid);
> > +           smt_mask = (1 << smt_bits) - 1;
> > +           /* Core */
> > +           core_bits = log2(max_coreid);
> > +           core_mask = (1 << (core_bits + smt_bits)) - 1;
> > +           core_mask ^= smt_mask;
> > +           /* Pkg */
> > +           pkg_bits = core_bits + smt_bits;
> > +           pkg_mask = -1 << core_bits;
> > +            
> > +           ci->ci_smt_id = apicid & smt_mask;
> > +           ci->ci_core_id = (apicid & core_mask) >> smt_bits;
> > +           ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
> > +   } else
> > +           goto no_topology;
> > +#ifdef DEBUG
> > +   printf("cpu%d: smt %u, core %u, pkg %u "
> > +       "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, 
> > smt_mask 0x%x, "
> > +       "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
> > +       ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
> > +       apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
> > +       core_mask, pkg_bits, pkg_mask);
> > +#else
> > +   printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
> > +       ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
> > +               
> > +#endif
> > +   return;
> > +   /* We can't map, so consider ci_core_id as ci_cpuid */
> > +no_topology:
> > +   ci->ci_smt_id  = 0;
> > +   ci->ci_core_id = ci->ci_cpuid;
> > +   ci->ci_pkg_id  = 0;
> >  }
> > +#endif     /* SMALL_KERNEL */
> > Index: arch/amd64/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> > retrieving revision 1.73
> > diff -d -u -p -r1.73 cpu.h
> > --- arch/amd64/include/cpu.h        17 Apr 2012 16:02:33 -0000      1.73
> > +++ arch/amd64/include/cpu.h        13 Jul 2012 11:56:34 -0000
> > @@ -100,7 +100,11 @@ struct cpu_info {
> >     u_int32_t       ci_model;
> >     u_int32_t       ci_cflushsz;
> >     u_int64_t       ci_tsc_freq;
> > -
> > +#ifndef SMALL_KERNEL
> > +   u_int32_t       ci_smt_id; 
> > +   u_int32_t       ci_core_id;
> > +   u_int32_t       ci_pkg_id;
> > +#endif     /* SMALL_KERNEL */
> >     struct cpu_functions *ci_func;
> >     void (*cpu_setup)(struct cpu_info *);
> >     void (*ci_info)(struct cpu_info *);
> > @@ -266,6 +270,9 @@ extern int cpuspeed;
> >  /* identcpu.c */
> >  void       identifycpu(struct cpu_info *);
> >  int        cpu_amd64speed(int *);
> > +#ifndef SMALL_KERNEL
> > +void       cpu_topology(struct cpu_info *);
> > +#endif     /* SMALL_KERNEL */
> >  
> >  /* machdep.c */
> >  void       dumpconf(void);
> > Index: arch/amd64/include/specialreg.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
> > retrieving revision 1.21
> > diff -d -u -p -r1.21 specialreg.h
> > --- arch/amd64/include/specialreg.h 27 Mar 2012 05:59:46 -0000      1.21
> > +++ arch/amd64/include/specialreg.h 13 Jul 2012 11:26:24 -0000
> > @@ -187,11 +187,13 @@
> >  #define CPUID2MODEL(cpuid) (((cpuid) >> 4) & 15)
> >  #define CPUID2STEPPING(cpuid)      ((cpuid) & 15)
> >  
> > -#define CPUID(code, eax, ebx, ecx, edx)                         \
> > +#define CPUID2(eax_code, ecx_code, eax, ebx, ecx, edx)             \
> >     __asm("cpuid"                                           \
> >         : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)    \
> > -       : "a" (code));
> > +       : "a" (eax_code), "c" (ecx_code));
> >  
> > +#define CPUID(code, eax, ebx, ecx, edx)                            \
> > +   CPUID2(code, 0, eax, ebx, ecx, edx)
> >  
> >  /*
> >   * Model-specific registers for the i386 family

Reply via email to