Re: agtimer(4/arm64): simplify agtimer_delay()

2023-08-08 Thread Dale Rahn
Switching the computation of cycles/delaycnt to a proper 64 value math
instead of the '32 bit safe' complex math is likely a good idea.
However I am not completely convinced that switching to 'yield' (current
CPU_BUSY_CYCLE implementation) for every loop of a 'short wait' in the wait
loop makes sense. In a hypervisor environment, this could cause a very
short wait between register writes to become very long, In a non-hypervisor
environment there is essentially no improvement because the yield doesn't
really have any benefits on non-hypervisor.
To my current understanding, there is no useful 'wait short period' on arm
cores.

On Mon, Aug 7, 2023 at 10:32 PM Scott Cheloha 
wrote:

> The agtimer(4/arm64) delay(9) implementation is quite complicated.
> This patch simplifies it.
>
> Am I missing something here?  There's no reason to implement the
> busy-loop like this, right?
>
> ok?
>
> Index: agtimer.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/dev/agtimer.c,v
> retrieving revision 1.23
> diff -u -p -r1.23 agtimer.c
> --- agtimer.c   25 Jul 2023 18:16:19 -  1.23
> +++ agtimer.c   8 Aug 2023 02:24:57 -
> @@ -323,32 +323,12 @@ agtimer_cpu_initclocks(void)
>  void
>  agtimer_delay(u_int usecs)
>  {
> -   uint64_tclock, oclock, delta, delaycnt;
> -   uint64_tcsec, usec;
> -   volatile intj;
> +   uint64_t cycles, start;
>
> -   if (usecs > (0x8000 / agtimer_frequency)) {
> -   csec = usecs / 1;
> -   usec = usecs % 1;
> -
> -   delaycnt = (agtimer_frequency / 100) * csec +
> -   (agtimer_frequency / 100) * usec / 1;
> -   } else {
> -   delaycnt = agtimer_frequency * usecs / 100;
> -   }
> -   if (delaycnt <= 1)
> -   for (j = 100; j > 0; j--)
> -   ;
> -
> -   oclock = agtimer_readcnt64();
> -   while (1) {
> -   for (j = 100; j > 0; j--)
> -   ;
> -   clock = agtimer_readcnt64();
> -   delta = clock - oclock;
> -   if (delta > delaycnt)
> -   break;
> -   }
> +   start = agtimer_readcnt64();
> +   cycles = (uint64_t)usecs * agtimer_frequency / 100;
> +   while (agtimer_readcnt64() - start < cycles)
> +   CPU_BUSY_CYCLE();
>  }
>
>  void
>
>


Re: macppc bsd.mp pmap's hash lock

2021-05-11 Thread Dale Rahn
This structure really should be cache-line aligned, which should prevent it
from spilling across a page boundary.

The powerpc pmap was originally designed to have  8 'way' locks so that
only a single way would get locked, thus (as long as one doesn't have way
more than 8 cores) any core should be able to choose some way to replace
and get the work done. This also would have allowed limited recursion.
However at some point those were taken out. Note that before locking one of
the ways of the hashtable, it would hold the lock on the pmap that it was
inserting the mapping from (which could be the kernel). Not certain if the
kernel pmap lock is recursive or not.
The point was to allow multiple cores to access different regions of the
hashtable at the same time (minimal contention). However it would also
allow a core to reenter the lock_try on a different way where it should be
able to succeed (as long as the recursion doesn't go 8 deep?)

On Tue, May 11, 2021 at 12:42 AM George Koehler  wrote:

> I made a mistake in my last diff by deleting the memory barriers.
> Here is a diff that keeps membar_enter() and membar_exit().
>
> With my last diff, my G5 froze after 15 hours of uptime, and again
> after 10 hours of uptime.  I'm not sure whether the missing memory
> barriers caused the freeze, but I will be running this diff and hoping
> for fewer freezes.
>
> On Sat, 8 May 2021 18:59:35 +0200 (CEST)
> Mark Kettenis  wrote:
>
> > Good find!  On powerpc64 I avoid the issue because I guarantee that
> > the kernel mappings are never evicted from the hash.  But doing so on
> > powerpc would require more serious development.  I'm not sure we
> > really need a ticket lock for this, but since you already did the
> > work, let's stick with it for now.
>
> __ppc_lock (before and after this diff) doesn't give tickets like
> __mp_lock does, but __ppc_lock and __mp_lock are both recursive locks.
> I don't know an easy way to avoid the recursion, if some of the kernel
> mappings might not be in the hash.  My __ppc_lock diff should be easy
> if I don't make mistakes like wrong memory barriers.
>
> --George
>
> Index: arch/powerpc/include/mplock.h
> ===
> RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 mplock.h
> --- arch/powerpc/include/mplock.h   15 Apr 2020 08:09:00 -  1.4
> +++ arch/powerpc/include/mplock.h   10 May 2021 23:33:00 -
> @@ -30,13 +30,13 @@
>  #define __USE_MI_MPLOCK
>
>  /*
> + * __ppc_lock exists because pte_spill_r() can't use __mp_lock.
>   * Really simple spinlock implementation with recursive capabilities.
>   * Correctness is paramount, no fancyness allowed.
>   */
>
>  struct __ppc_lock {
> -   volatile struct cpu_info *mpl_cpu;
> -   volatile long   mpl_count;
> +   volatile unsigned int   mpl_bolt;
>  };
>
>  #ifndef _LOCORE
> @@ -44,10 +44,6 @@ struct __ppc_lock {
>  void __ppc_lock_init(struct __ppc_lock *);
>  void __ppc_lock(struct __ppc_lock *);
>  void __ppc_unlock(struct __ppc_lock *);
> -int __ppc_release_all(struct __ppc_lock *);
> -int __ppc_release_all_but_one(struct __ppc_lock *);
> -void __ppc_acquire_count(struct __ppc_lock *, int);
> -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *);
>
>  #endif
>
> Index: arch/powerpc/powerpc/lock_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 lock_machdep.c
> --- arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 -  1.9
> +++ arch/powerpc/powerpc/lock_machdep.c 10 May 2021 23:33:00 -
> @@ -1,6 +1,7 @@
>  /* $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $*/
>
>  /*
> + * Copyright (c) 2021 George Koehler 
>   * Copyright (c) 2007 Artur Grabowski 
>   *
>   * Permission to use, copy, modify, and distribute this software for any
> @@ -22,15 +23,29 @@
>  #include 
>
>  #include 
> -#include 
>
>  #include 
>
> +/*
> + * If __ppc_lock() crosses a page boundary in the kernel text, then it
> + * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r()
> + * would recursively call __ppc_lock().  The lock must be in a valid
> + * state when the page fault happens.
> + *
> + * This lock has 2 fields, "cpuid" and "count", packed in a 32-bit
> + * "bolt", so we can use 32-bit atomic ops to ensure that our lock is
> + * always in a valid state.
> + */
> +#define BOLT(cpuid, count) ((cpuid) << 24 | (count) & 0x00ff)
> +#define BOLT_CPUID(bolt)   ((bolt) >> 24)
> +#define BOLT_COUNT(bolt)   ((bolt) & 0x00ff)
> +#define BOLT_LOCKED(bolt)  (BOLT_COUNT(bolt) != 0)
> +#define BOLT_UNLOCKED(bolt)(BOLT_COUNT(bolt) == 0)
> +
>  void
>  __ppc_lock_init(struct __ppc_lock *lock)
>  {
> -   lock->mpl_cpu = NULL;
> -   lock->mpl_count = 0;
> +   lock->mpl_bolt = 0;
>  }
>
>  #if 

Re: all platforms: isolate hardclock(9) from statclock()

2021-01-14 Thread Dale Rahn
On Wed, Jan 13, 2021 at 9:38 PM Scott Cheloha 
wrote:
snip

> Do we need to properly set stathz for each platform in *this* diff?
> Or can it wait?
>
> I was hoping to do a sweep of the tree in a later patch and ensure
> that stathz is non-zero everywhere and simultaneously remove code like
> this:
>
> int realstathz = stathz ? stathz : hz;
>
> Near as I can tell, stathz doesn't need to be nonzero for statclock()
> to work correctly.  Also, setstatclockrate() doesn't even run unless
> stathz is non-zero, so there are no issues with the profiling clock
> stuff yet.
>
> > This isn't to say that removing the stathz== 0 magic should not be done,
> > but if done, make certain that stathz and profhz are properly
> > updated/configured.
>

Every driver which is being modified to call both hardclock() and
statclock()  will need to be modified again to set stathz, then you will
need to verify with the rest of the system that there is no other places
that stathz init was previously forgotten. Therefore, why not correctly
modify each driver now?


Re: all platforms: isolate hardclock(9) from statclock()

2021-01-09 Thread Dale Rahn
The 'magic' here was that MD code could choose to implement statclock (and
set stathz appropriately), or MD code could not care about the multiple
statclock/hardclock interfaces into the scheduler. Also some clock drivers
on a platform could enable split hardclock/statclock where others did not.
Back near the beginning of OpenBSD platforms existed that had no higher
precision clock than that timer interrupt, and nothing existed to even
approximate higher precision (eg cycle counters or instruction counters).

Some clock drivers have a separate timer/interrupt or separate 'event'
tracked to schedule the stat() event. These platforms may also
(dynamically) change the stathz clock when profiling is enabled/disabled.
This is implemented in arm64 in agtimer_setstatclockrate()

Any clock driver that directly calls statclock() should make certain to
stathz (and profhz) appropriately, as no assumptions to it's rate/frequency
should be assumed.

This isn't to say that removing the stathz== 0 magic should not be done,
but if done, make certain that stathz and profhz are properly
updated/configured.

Dale

On Sat, Jan 9, 2021 at 5:57 AM Visa Hankala  wrote:

> On Fri, Jan 08, 2021 at 10:18:27AM -0600, Scott Cheloha wrote:
> > On Thu, Jan 07, 2021 at 08:12:10PM -0600, Scott Cheloha wrote:
> > > On Thu, Jan 07, 2021 at 09:37:58PM +0100, Mark Kettenis wrote:
> > > > > Date: Thu, 7 Jan 2021 11:15:41 -0600
> > > > > From: Scott Cheloha 
> > > > >
> > > > > Hi,
> > > > >
> > > > > I want to isolate statclock() from hardclock(9).  This will
> simplify
> > > > > the logic in my WIP dynamic clock interrupt framework.
> > > > >
> > > > > Currently, if stathz is zero, we call statclock() from within
> > > > > hardclock(9).  It looks like this (see sys/kern/kern_clock.c):
> > > > >
> > > > > void
> > > > > hardclock(struct clockframe *frame)
> > > > > {
> > > > > /* [...] */
> > > > >
> > > > > if (stathz == 0)
> > > > > statclock(frame);
> > > > >
> > > > > /* [...] */
> > > > >
> > > > > This is the case on alpha, amd64 (w/ lapic), hppa, i386 (w/ lapic),
> > > > > loongson, luna88k, mips64, and sh.
> > > > >
> > > > > (We seem to do it on sgi, too.  I was under the impression that sgi
> > > > > *was* a mips64 platform, yet sgi seems to it have its own clock
> > > > > interrupt code distinct from mips64's general clock interrupt code
> > > > > which is used by e.g. octeon).
> > > > >
> > > > > However, if stathz is not zero we call statclock() separately.
> This
> > > > > is the case on armv7, arm, arm64, macppc, powerpc64, and sparc64.
> > > > >
> > > > > (The situation for the general powerpc code and socppc in
> particular
> > > > > is a mystery to me.)
> > > > >
> > > > > If we could remove this MD distinction it would make my MI
> framework
> > > > > simpler.  Instead of checking stathz and conditionally starting a
> > > > > statclock event I will be able to unconditionally start a statclock
> > > > > event on all platforms on every CPU.
> > > > >
> > > > > In general I don't think the "is stathz zero?" variance between
> > > > > platforms is useful:
> > > > >
> > > > > - The difference is invisible to userspace, as we hide the fact
> that
> > > > >   stathz is zero from e.g. the kern.clockrate sysctl.
> > > > >
> > > > > - We run statclock() at some point, regardless of whether stathz is
> > > > >   zero.  If statclock() is run from hardclock(9) then isn't stathz
> > > > >   effectively equal to hz?
> > > > >
> > > > > - Because stathz might be zero we need to add a bunch of safety
> checks
> > > > >   to our MI code to ensure we don't accidentally divide by zero.
> > > > >
> > > > > Maybe we can ensure stathz is non-zero in a later diff...
> > > > >
> > > > > --
> > > > >
> > > > > Anyway, I don't think I have missed any platforms.  However, if
> > > > > platform experts could weigh in here to verify my changes (and test
> > > > > them!) I'd really appreciate it.
> > > > >
> > > > > In particular, I'm confused about how clock interrupts work on
> > > > > powerpc, socppc, and sgi.
> > > > >
> > > > > --
> > > > >
> > > > > Thoughts?  Platform-specific OKs?
> > > >
> > > > I wouldn't be opposed to doing this.  It is less magic!
> > > >
> > > > But yes, this needs to be tested on the platforms that you change.
> > >
> > > I guess I'll CC all the platform-specific people I'm aware of.
> > >
> > > > Note that many platforms don't have have separate schedclock and
> > > > statclock.  But on many platforms where we use a one-shot timer as
> the
> > > > clock we have a randomized statclock.  I'm sure Theo would love to
> > > > tell you about the cpuhog story...
> > >
> > > I am familiar with cpuhog.  It's the one thing everybody mentions when
> > > I talk about clock interrupts and/or statclock().
> > >
> > > Related:
> > >
> > > I wonder if we could avoid the cpuhog problem entirely by implementing
> > > some kind of MI cycle counting clock API that we use to timestamp
> > > whenever we 

Re: Enable arm64 PAN feature

2020-08-17 Thread Dale Rahn
could we check that there is not an ESR value that indicates PAN violation
instead of using 'instruction recognition'?

Seems that it would be more reliable.
Thanks
Dale

On Mon, Aug 17, 2020 at 1:30 AM Jonathan Gray  wrote:

> On Sat, Aug 15, 2020 at 01:54:34PM +0200, Mark Kettenis wrote:
> > > Date: Sat, 15 Aug 2020 20:21:09 +1000
> > > From: Jonathan Gray 
> > >
> > > On Fri, Aug 14, 2020 at 11:06:59PM +0200, Mark Kettenis wrote:
> > > > > Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
> > > > > From: Mark Kettenis 
> > > > >
> > > > > I suppose a way to test this properly is to pick a system call and
> > > > > replace a copyin() with a direct access?  That will succeed without
> > > > > PAN but should fail with PAN enabled right?
> > > >
> > > > So that does indeed work.  However, the result is a hard hang.  So
> > > > here as an additional diff that makes sure we panic instead.  The
> idea
> > > > is that all user-space access from the kernel should be done by the
> > > > special unprivileged load/store instructions.
> > >
> > > Would disabling PSTATE.PAN in copyin/copyout along the lines of how
> > > stac/clac is done for SMAP avoid having to test the instruction type
> > > entirely?
> >
> > No.  The problem is that we meed to catch permission faults caused by
> > PAN.  But since the faulting address may be valid in the sense that
> > UVM has a mapping for them that allows the requested access.  In that
> > case we end up looping since uvm_fault() returns 0 and we retry the
> > faulting instruction.
> >
> > > Is it faulting on valid copyin/copyout the way you have it at the
> > > moment?  I don't quite follow what is going on.
> >
> > The copyin/copyout functions use the unpriviliged load/store
> > instructions (LDTR/STTR) which bypass PAN.  But we may still fault
> > because the memory hasn't been faulted in or because the memory has
> > been marked read-only for CoW or for MOD/REF emulation.  And some of
> > those faults manifest themselves as permission faults as well.
> >
> > Currently (without the diff quoted below) those faults will be handled
> > just fine.  The diff below needs to make sure this continues to be the
> > case.  The easiest way to do that is to check the instruction.
> >
> > Note that this check is in the "slow path".  In most cases the address
> > touched by copyin/copyout will already be in the page tables since
> > userland touched it already.
> >
> > Does that clarfify things?
>
> Yes, thanks.  I'm fine with both of these diffs going in but still think
> you should change the mask.
>
> >
> >
> > > > Index: arch/arm64/arm64/trap.c
> > > > ===
> > > > RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
> > > > retrieving revision 1.27
> > > > diff -u -p -r1.27 trap.c
> > > > --- arch/arm64/arm64/trap.c   6 Jan 2020 12:37:30 -
>  1.27
> > > > +++ arch/arm64/arm64/trap.c   14 Aug 2020 21:05:54 -
> > > > @@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
> > > >
> > > >  void dumpregs(struct trapframe*);
> > > >
> > > > +/* Check whether we're executing an unprivileged load/store
> instruction. */
> > > > +static inline int
> > > > +is_unpriv_ldst(uint64_t elr)
> > > > +{
> > > > + uint32_t insn = *(uint32_t *)elr;
> > > > + return ((insn & 0x3f200c00) == 0x38000800);
> > >
> > > The value of op1 (bit 26) is not used according to the table in the Arm
> > > ARM.  The mask would be better as 0x3b200c00
> > >
> > >
> > > > +}
> > > > +
> > > >  static void
> > > >  data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
> > > >  int lower, int exe)
> > > > @@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
> > > >   /* The top bit tells us which range to use */
> > > >   if ((far >> 63) == 1)
> > > >   map = kernel_map;
> > > > - else
> > > > + else {
> > > > + /*
> > > > +  * Only allow user-space access using
> > > > +  * unprivileged load/store instructions.
> > > > +  */
> > > > + if (!is_unpriv_ldst(frame->tf_elr)) {
> > > > + panic("attempt to access user address"
> > > > +   " 0x%llx from EL1", far);
> > > > + }
> > > > +
> > > >   map = >p_vmspace->vm_map;
> > > > + }
> > > >   }
> > > >
> > > >   if (exe)
> > > >
> > > >
> > >
> >
> >
>
>


Re: Enable arm64 PAN feature

2020-08-15 Thread Dale Rahn
Enabling PAN is a great idea. I have only skimmed this diff at this point,
but it looks reasonable, with the additional check to catch the PAN
violation in the data abort handler.

Dale

On Sat, Aug 15, 2020 at 6:56 AM Mark Kettenis 
wrote:

> > Date: Sat, 15 Aug 2020 20:21:09 +1000
> > From: Jonathan Gray 
> >
> > On Fri, Aug 14, 2020 at 11:06:59PM +0200, Mark Kettenis wrote:
> > > > Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
> > > > From: Mark Kettenis 
> > > >
> > > > I suppose a way to test this properly is to pick a system call and
> > > > replace a copyin() with a direct access?  That will succeed without
> > > > PAN but should fail with PAN enabled right?
> > >
> > > So that does indeed work.  However, the result is a hard hang.  So
> > > here as an additional diff that makes sure we panic instead.  The idea
> > > is that all user-space access from the kernel should be done by the
> > > special unprivileged load/store instructions.
> >
> > Would disabling PSTATE.PAN in copyin/copyout along the lines of how
> > stac/clac is done for SMAP avoid having to test the instruction type
> > entirely?
>
> No.  The problem is that we meed to catch permission faults caused by
> PAN.  But since the faulting address may be valid in the sense that
> UVM has a mapping for them that allows the requested access.  In that
> case we end up looping since uvm_fault() returns 0 and we retry the
> faulting instruction.
>
> > Is it faulting on valid copyin/copyout the way you have it at the
> > moment?  I don't quite follow what is going on.
>
> The copyin/copyout functions use the unpriviliged load/store
> instructions (LDTR/STTR) which bypass PAN.  But we may still fault
> because the memory hasn't been faulted in or because the memory has
> been marked read-only for CoW or for MOD/REF emulation.  And some of
> those faults manifest themselves as permission faults as well.
>
> Currently (without the diff quoted below) those faults will be handled
> just fine.  The diff below needs to make sure this continues to be the
> case.  The easiest way to do that is to check the instruction.
>
> Note that this check is in the "slow path".  In most cases the address
> touched by copyin/copyout will already be in the page tables since
> userland touched it already.
>
> Does that clarfify things?
>
>
> > > Index: arch/arm64/arm64/trap.c
> > > ===
> > > RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
> > > retrieving revision 1.27
> > > diff -u -p -r1.27 trap.c
> > > --- arch/arm64/arm64/trap.c 6 Jan 2020 12:37:30 -   1.27
> > > +++ arch/arm64/arm64/trap.c 14 Aug 2020 21:05:54 -
> > > @@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
> > >
> > >  void dumpregs(struct trapframe*);
> > >
> > > +/* Check whether we're executing an unprivileged load/store
> instruction. */
> > > +static inline int
> > > +is_unpriv_ldst(uint64_t elr)
> > > +{
> > > +   uint32_t insn = *(uint32_t *)elr;
> > > +   return ((insn & 0x3f200c00) == 0x38000800);
> >
> > The value of op1 (bit 26) is not used according to the table in the Arm
> > ARM.  The mask would be better as 0x3b200c00
> >
> >
> > > +}
> > > +
> > >  static void
> > >  data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
> > >  int lower, int exe)
> > > @@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
> > > /* The top bit tells us which range to use */
> > > if ((far >> 63) == 1)
> > > map = kernel_map;
> > > -   else
> > > +   else {
> > > +   /*
> > > +* Only allow user-space access using
> > > +* unprivileged load/store instructions.
> > > +*/
> > > +   if (!is_unpriv_ldst(frame->tf_elr)) {
> > > +   panic("attempt to access user address"
> > > + " 0x%llx from EL1", far);
> > > +   }
> > > +
> > > map = >p_vmspace->vm_map;
> > > +   }
> > > }
> > >
> > > if (exe)
> > >
> > >
> >
>
>


Re: powerpc64: Target Info in clang for __OpenBSD__

2020-05-19 Thread Dale Rahn
On Tue, May 19, 2020 at 11:36:23PM +0200, Patrick Wildt wrote:
> Hi,
> 
> drahn@ was complaining to me that his cross-compiler wasn't defining
> __OpenBSD__ or __ELF__, and I think the fix is pretty simple.  We're
> just missing a case in a switch-case.
> 
> The .cpp file itself still compiles, but I haven't built a full clang
> with it.  Please give it a go and report back.
> 
> I'll already ask for OKs though, but will only commit once I got
> positive feedback.  :)
> 
> ok?

$ powerpc64-unknown-openbsd6.7-clang -dM -E -
...
#define __OpenBSD__ 1
...
#define __ELF__ 1
...

ok drahn@

> 
> Patrick
> 
> diff --git a/gnu/llvm/tools/clang/lib/Basic/Targets.cpp 
> b/gnu/llvm/tools/clang/lib/Basic/Targets.cpp
> index 3c139d72479..5bff08ad70d 100644
> --- a/gnu/llvm/tools/clang/lib/Basic/Targets.cpp
> +++ b/gnu/llvm/tools/clang/lib/Basic/Targets.cpp
> @@ -349,6 +349,8 @@ TargetInfo *AllocateTarget(const llvm::Triple ,
>return new FreeBSDTargetInfo(Triple, Opts);
>  case llvm::Triple::NetBSD:
>return new NetBSDTargetInfo(Triple, Opts);
> +case llvm::Triple::OpenBSD:
> +  return new OpenBSDTargetInfo(Triple, Opts);
>  default:
>return new PPC64TargetInfo(Triple, Opts);
>  }
> @@ -359,6 +361,8 @@ TargetInfo *AllocateTarget(const llvm::Triple ,
>return new LinuxTargetInfo(Triple, Opts);
>  case llvm::Triple::NetBSD:
>return new NetBSDTargetInfo(Triple, Opts);
> +case llvm::Triple::OpenBSD:
> +  return new OpenBSDTargetInfo(Triple, Opts);
>  default:
>return new PPC64TargetInfo(Triple, Opts);
>  }
> 
Dale Rahn   dr...@dalerahn.com



Re: arm64/disksubr.c

2018-05-28 Thread Dale Rahn
ok drahn@
On Mon, May 28, 2018 at 03:57:58PM +0200, Mark Kettenis wrote:
> This has a hand-rolled readdisksector.  Replace it with a function
> call like we do on other architectures.  Also remove an include that
> isn't needed and isn't present on other architectures.
> 
> ok?
> 
> 
> Index: arch/arm64/arm64/disksubr.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/disksubr.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 disksubr.c
> --- arch/arm64/arm64/disksubr.c   28 Feb 2017 10:49:37 -  1.2
> +++ arch/arm64/arm64/disksubr.c   28 May 2018 13:55:31 -
> @@ -32,7 +32,6 @@
>   */
>  
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -110,15 +109,11 @@ writedisklabel(dev_t dev, void (*strat)(
>   goto done;
>  
>   /* Read it in, slap the new label in, and write it back out */
> - bp->b_blkno = DL_BLKTOSEC(lp, partoff + DOS_LABELSECTOR) *
> - DL_BLKSPERSEC(lp);
> - offset = DL_BLKOFFSET(lp, partoff + DOS_LABELSECTOR);
> - bp->b_bcount = lp->d_secsize;
> - CLR(bp->b_flags, B_READ | B_WRITE | B_DONE);
> - SET(bp->b_flags, B_BUSY | B_READ | B_RAW);
> - (*strat)(bp);
> - if ((error = biowait(bp)) != 0)
> + error = readdisksector(bp, strat, lp, DL_BLKTOSEC(lp, partoff +
> + DOS_LABELSECTOR));
> + if (error)
>   goto done;
> + offset = DL_BLKOFFSET(lp, partoff + DOS_LABELSECTOR);
>  
>   dlp = (struct disklabel *)(bp->b_data + offset);
>   *dlp = *lp;
> 
Dale Rahn   dr...@dalerahn.com



arm64 pagezero_cache, page zero performance improvement

2017-08-27 Thread Dale Rahn
Performance: use cache zeroing function to pmap_zero_page

This improves page zeroing (a rather common occurance) by over 8x.

The only restriction is that pagezero_cache occurs on cached pages, but the 
pmap_zero_page always uses cached pages.


Index: arm64/pmap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
retrieving revision 1.37
diff -u -p -r1.37 pmap.c
--- arm64/pmap.c9 Aug 2017 05:53:11 -   1.37
+++ arm64/pmap.c27 Aug 2017 18:25:28 -
@@ -769,8 +769,7 @@ pmap_zero_page(struct vm_page *pg)
 
pmap_kenter_pa(zero_page, pa, PROT_READ|PROT_WRITE);
 
-   /* XXX use better zero operation? */
-   bzero((void *)zero_page, PAGE_SIZE);
+   pagezero_cache(zero_page);
 
pmap_kremove_pg(zero_page);
 }
Index: include/pmap.h
===
RCS file: /cvs/src/sys/arch/arm64/include/pmap.h,v
retrieving revision 1.5
diff -u -p -r1.5 pmap.h
--- include/pmap.h  10 May 2017 21:58:55 -  1.5
+++ include/pmap.h  27 Aug 2017 18:25:28 -
@@ -59,6 +59,8 @@ extern paddr_t zero_page;
 extern paddr_t copy_src_page;
 extern paddr_t copy_dst_page;
 
+void pagezero_cache(vaddr_t);
+
 /*
  * Pmap stuff
  */
Dale Rahn   dr...@dalerahn.com



Re: armv7 small XXX fix

2017-07-12 Thread Dale Rahn
On Wed, Jul 12, 2017 at 11:06:23PM +0300, Artturi Alm wrote:
> On Wed, Jul 12, 2017 at 06:12:34PM +0200, Mark Kettenis wrote:
> > > Date: Mon, 10 Jul 2017 23:18:59 +0300
> > > From: Artturi Alm <artturi@gmail.com>
> > > 
> > > Hi,
> > > 
> > > this does clutter my diffs, and the XXX comment is correct,
> > 
> > It probably isn't. None of the other architectures have those macros
> > in their .
> > 
> 
> Ok, didn't consider that, you're right and the diff withdrawn, but anyway,
> what i was imprecisely after was that i'd prefer
> having something like RODATA(name) found from sparc64/include/asm.h,
> instead of the useless _C_LABEL() to use in armv7 md .S assembly, and
> just because of the weird name i didn't find intuitive enough, i didn't
> even suggest the arm64 EENTRY()..
> Don't get me wrong, i'm more than happy to drop all the labeling
> macros out of my diffs, and choose my self what is global and what is not,
> while it's against the minimalism i'm now aiming at to even get the diffs
> read when freetime does meet:)

_C_LABEL() is a holdover from the pre-ELF days. At one time (in NetBSD)
this code still compiled using an a.out toolchain. In that era, it was
defined to be _C_LABEL(x) _/**/x (this was also pre-ansi)
(or _ ## x post ansi). On ELF _C_LABEL evaluates to just the symbol. It
cannot have any association to either text or data because the proper use
of it can refer to either text or data eg: bl   _C_LABEL(c_func).

Basically it is a relic from the past, it might make sense to just remove
all references to _C_LABEL() at this point and just the symbol itself.

Dale Rahn   dr...@dalerahn.com



Re: arm64 lock: no userland progress, several procs in wchan "vp"

2017-05-01 Thread Dale Rahn
  479844  1  0  30x80  mfsidlmount_mfs
> >  58334  330646  0  0  3 0x14200  pgzerozerothread
> >  15557  331142  0  0  3 0x14200  aiodoned  aiodoned  
> >  34557  432814  0  0  3 0x14200  syncerupdate  
> >  82663  208419  0  0  3 0x14200  cleaner   cleaner
> >  51853  347618  0  0  3 0x14200  reaperreaper 
> >  18753  499821  0  0  3 0x14200  pgdaemon  pagedaemon
> >  59831  415568  0  0  3 0x14200  bored crynlk
> >  29354  478337  0  0  3 0x14200  bored crypto
> >  338986377  0  0  3 0x14200  pftm  pfpurge
> >  33736  115197  0  0  3 0x14200  usbtskusbtask
> >  54044   43212  0  0  3 0x14200  usbatsk   usbatsk
> >  57344  273215  0  0  3 0x14200  bored softnet
> >  88556   58351  0  0  3 0x14200  bored systqmp
> >  69342  477649  0  0  3 0x14200  bored systq  
> >  25767  494373  0  0  3  0x40014200  bored softclock
> > *43785   51419  0  0  7  0x40014200idle0
> >   9892  420912  0  0  3 0x14200  kmalloc   kmthread
> >  1   54365  0  0  30x82  wait  init
> >  0   0 -1  0  3 0x10200  scheduler swapper
> 
> The diff below might fix thise.  Or it might actually turn this into a
> hard hang...
> 
> Nevertheless, could you try running with it?

I have effectively the identical change on my SMP branch, so if you
want to commit this, ok drahn@

It just pushes the allocation failure into the CANFAIL case where it
should be handled almost gracefully. On SMP it was necessary it appeared
necessary because of nested mutex/locks.

> 
> 
> Index: pmap.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.33
> diff -u -p -r1.33 pmap.c
> --- pmap.c15 Apr 2017 11:15:02 -  1.33
> +++ pmap.c1 May 2017 20:16:44 -
> @@ -322,17 +322,10 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>   struct pmapvp2 *vp2;
>   struct pmapvp3 *vp3;
>  
> - int vp_pool_flags;
> - if (pm == pmap_kernel()) {
> - vp_pool_flags  = PR_NOWAIT;
> - } else {
> - vp_pool_flags  = PR_WAITOK |PR_ZERO;
> - }
> -
>   if (pm->have_4_level_pt) {
>   vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)];
>   if (vp1 == NULL) {
> - vp1 = pool_get(_vp_pool, vp_pool_flags);
> + vp1 = pool_get(_vp_pool, PR_NOWAIT | PR_ZERO);
>   if (vp1 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
>   panic("%s: unable to allocate L1",
> @@ -347,7 +340,7 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>  
>   vp2 = vp1->vp[VP_IDX1(va)];
>   if (vp2 == NULL) {
> - vp2 = pool_get(_vp_pool, vp_pool_flags);
> + vp2 = pool_get(_vp_pool, PR_NOWAIT | PR_ZERO);
>   if (vp2 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
>   panic("%s: unable to allocate L2", __func__);
> @@ -358,7 +351,7 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>  
>   vp3 = vp2->vp[VP_IDX2(va)];
>   if (vp3 == NULL) {
> - vp3 = pool_get(_vp_pool, vp_pool_flags);
> + vp3 = pool_get(_vp_pool, PR_NOWAIT | PR_ZERO);
>   if (vp3 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
>   panic("%s: unable to allocate L3", __func__);
> 
Dale Rahn   dr...@dalerahn.com



Re: ARM64: attach CPUs for identification purposes.

2017-04-24 Thread Dale Rahn
On Mon, Apr 24, 2017 at 10:18:08PM +0200, Mark Kettenis wrote:
> > Date: Mon, 24 Apr 2017 16:01:39 -0400
> > From: Dale Rahn <dr...@dalerahn.com>
> > 
> > The below code is set up to attach and identify processors for arm64
> > currently it only identifies model, not cache size, but that can be added
> > later. It is set up to attach secondary processors later (for when SMP
> > is present).
> 
> I don't think this cpubus(4) device makes much sense.  We don't have
> something like that on other FDT/OpenFirmware architectures.
> 

I am not aware of another way to have the fdt bus probe force recursive search
on the tree other than this, as previously said, the '/cpus' node does
not have a compatible field to do a normal attch. 

Anyway, here is an updated diff which ignores nodes under /cpus other than 
'cpu' nodes.

diff --git sys/arch/arm64/arm64/cpu.c sys/arch/arm64/arm64/cpu.c
new file mode 100644
index 000..7535522d1ed
--- /dev/null
+++ sys/arch/arm64/arm64/cpu.c
@@ -0,0 +1,138 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 Dale Rahn <dr...@dalerahn.com>
+ * Copyright (c) 1997-2004 Opsycon AB (www.opsycon.se)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+intcpumatch(struct device *, void *, void *);
+void   cpuattach(struct device *, struct device *, void *);
+
+struct cpu_cores {
+   int  id;
+   char*name;
+};
+
+struct cpu_cores cpu_cores_none[] = {
+   { 0x0, "Unknown" },
+};
+
+struct cpu_cores cpu_cores_arm[] = {
+   { CPU_PART_CORTEX_A53, "Cortex-A53" },
+   { CPU_PART_CORTEX_A57, "Cortex-A57" },
+   { CPU_PART_CORTEX_A72, "Cortex-A72" },
+   { 0x0, "Unknown" },
+};
+
+/* arm cores makers */
+const struct implementers {
+   int   id;
+   char *name;
+   struct cpu_cores*corelist;
+} cpu_implementers[] = {
+   { CPU_IMPL_ARM, "ARM",  cpu_cores_arm },
+   { 0,"", NULL },
+};
+
+
+void
+cpu_identify(struct cpu_info *ci)
+{
+   uint64_t midr, impl, part;
+   char *impl_name = "Unknown";
+   char *part_name = "Unknown";
+   struct cpu_cores *coreselecter = NULL;
+
+   int i;
+
+   midr = READ_SPECIALREG(midr_el1);
+
+   impl = CPU_IMPL(midr);
+   part = CPU_PART(midr);
+
+   for (i = 0; cpu_implementers[i].id != 0; i++) {
+   if (cpu_implementers[i].id == impl) {
+   impl_name = cpu_implementers[i].name;
+   coreselecter =  cpu_implementers[i].corelist;
+   break;
+   }
+   }
+
+   if (impl_name != NULL) {
+   for (i = 0; coreselecter[i].id != 0; i++) {
+   if (part == coreselecter[i].id) {
+   part_name = coreselecter[i].name; 
+   }
+   }
+   printf(" %s %s r%dp%d", impl_name, part_name, CPU_VAR(midr),
+   CPU_REV(midr));
+   } else {
+   printf (" unknown implementer");
+   
+   }
+}
+
+void
+cpuattach(struct device *parent, struct device *dev, void *aux)
+{
+   struct cpu_info *ci;
+   int cpuno = dev->dv_unit;
+
+   if (cpuno == 0) {
+   ci = _info_primary;
+   ci->ci_cpuid = cpuno;
+   ci->ci_dev = dev;
+
+   printf

ARM64: attach CPUs for identification purposes.

2017-04-24 Thread Dale Rahn
The below code is set up to attach and identify processors for arm64
currently it only identifies model, not cache size, but that can be added
later. It is set up to attach secondary processors later (for when SMP
is present).

diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c
new file mode 100644
index 000..7535522d1ed
--- /dev/null
+++ b/sys/arch/arm64/arm64/cpu.c
@@ -0,0 +1,138 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 Dale Rahn <dr...@dalerahn.com>
+ * Copyright (c) 1997-2004 Opsycon AB (www.opsycon.se)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+intcpumatch(struct device *, void *, void *);
+void   cpuattach(struct device *, struct device *, void *);
+
+struct cpu_cores {
+   int  id;
+   char*name;
+};
+
+struct cpu_cores cpu_cores_none[] = {
+   { 0x0, "Unknown" },
+};
+
+struct cpu_cores cpu_cores_arm[] = {
+   { CPU_PART_CORTEX_A53, "Cortex-A53" },
+   { CPU_PART_CORTEX_A57, "Cortex-A57" },
+   { CPU_PART_CORTEX_A72, "Cortex-A72" },
+   { 0x0, "Unknown" },
+};
+
+/* arm cores makers */
+const struct implementers {
+   int   id;
+   char *name;
+   struct cpu_cores*corelist;
+} cpu_implementers[] = {
+   { CPU_IMPL_ARM, "ARM",  cpu_cores_arm },
+   { 0,"", NULL },
+};
+
+
+void
+cpu_identify(struct cpu_info *ci)
+{
+   uint64_t midr, impl, part;
+   char *impl_name = "Unknown";
+   char *part_name = "Unknown";
+   struct cpu_cores *coreselecter = NULL;
+
+   int i;
+
+   midr = READ_SPECIALREG(midr_el1);
+
+   impl = CPU_IMPL(midr);
+   part = CPU_PART(midr);
+
+   for (i = 0; cpu_implementers[i].id != 0; i++) {
+   if (cpu_implementers[i].id == impl) {
+   impl_name = cpu_implementers[i].name;
+   coreselecter =  cpu_implementers[i].corelist;
+   break;
+   }
+   }
+
+   if (impl_name != NULL) {
+   for (i = 0; coreselecter[i].id != 0; i++) {
+   if (part == coreselecter[i].id) {
+   part_name = coreselecter[i].name; 
+   }
+   }
+   printf(" %s %s r%dp%d", impl_name, part_name, CPU_VAR(midr),
+   CPU_REV(midr));
+   } else {
+   printf (" unknown implementer");
+   
+   }
+}
+
+void
+cpuattach(struct device *parent, struct device *dev, void *aux)
+{
+   struct cpu_info *ci;
+   int cpuno = dev->dv_unit;
+
+   if (cpuno == 0) {
+   ci = _info_primary;
+   ci->ci_cpuid = cpuno;
+   ci->ci_dev = dev;
+
+   printf(":");
+   cpu_identify(ci);
+   } else {
+   printf(": cpu not attached");
+   }
+
+   printf("\n");
+}
+
diff --git a/sys/arch/arm64/conf/GENERIC b/sys/arch/arm64/conf/GENERIC
index 44ab4f3e39f..3a0f9433491 100644
--- a/sys/arch/arm64/conf/GENERIC
+++ b/sys/arch/arm64/conf/GENERIC
@@ -51,6 +51,9 @@ ahci* at fdt?
 pciecam*   at fdt?
 pci*   at pciecam?
 
+cpubus0at mainbus?
+cpu0   at cpubus?
+
 # NS16550 compatible serial ports
 com*   at fdt?
 
diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64

Re: Another arm64 pmap.c cleanup

2017-04-04 Thread Dale Rahn
-1853,10 +1746,6 @@ int
>  pmap_clear_reference(struct vm_page *pg)
>  {
>   struct pte_desc *pted;
> -
> - //printf("%s\n", __func__);
> -
> - // XXX locks
>   int s;
>  
>   s = splvm();
> @@ -1885,8 +1774,6 @@ pmap_unwire(pmap_t pm, vaddr_t va)
>  {
>   struct pte_desc *pted;
>  
> - //printf("%s\n", __func__);
> -
>   pted = pmap_vp_lookup(pm, va, NULL);
>   if ((pted != NULL) && (pted->pted_va & PTED_VA_WIRED_M)) {
>   pm->pm_stats.wired_count--;
> @@ -1926,7 +1813,7 @@ pmap_setup_avail(uint64_t ram_start, uin
>   pmap_avail[0].start = ram_start;
>   pmap_avail[0].size = ram_end-ram_start;
>  
> - // XXX - support more than one region
> + /* XXX - support more than one region */
>   pmap_memregions[0].start = ram_start;
>   pmap_memregions[0].end = ram_end;
>   pmap_memcount = 1;
> @@ -2056,7 +1943,6 @@ pmap_steal_avail(size_t size, int align,
>   struct mem_region *mp;
>   long start;
>   long remsize;
> - arm_kvm_stolen += size; // debug only
>  
>   for (mp = pmap_avail; mp->size; mp++) {
>   if (mp->size > size) {
> @@ -2074,7 +1960,6 @@ pmap_steal_avail(size_t size, int align,
>   }
>   panic ("unable to allocate region with size %x align %x",
>   size, align);
> - return 0; // XXX - only here because of ifdef
>  }
>  
>  vaddr_t
> @@ -2142,49 +2027,45 @@ pmap_show_mapping(uint64_t va)
>   struct pmapvp2 *vp2;
>   struct pmapvp3 *vp3;
>   struct pte_desc *pted;
> - printf("showing mapping of %llx\n", va);
>   struct pmap *pm;
> - if (va & 1ULL << 63) {
> + uint64_t ttbr0, tcr;
> +
> + printf("showing mapping of %llx\n", va);
> +
> + if (va & 1ULL << 63)
>   pm = pmap_kernel();
> - } else {
> + else
>   pm = curproc->p_vmspace->vm_map.pmap;
> - }
>  
>   if (pm->have_4_level_pt) {
>   printf("  vp0 = %llx off %x\n",  pm->pm_vp.l0, VP_IDX0(va)*8);
>   vp1 = pm->pm_vp.l0->vp[VP_IDX0(va)];
> - if (vp1 == NULL) {
> + if (vp1 == NULL)
>   return;
> - }
>   } else {
>   vp1 = pm->pm_vp.l1;
>   }
> - uint64_t ttbr0, tcr;
> +
>   __asm volatile ("mrs %x0, ttbr0_el1" : "=r"(ttbr0));
>   __asm volatile ("mrs %x0, tcr_el1" : "=r"(tcr));
> -
>   printf("  ttbr0 %llx %llx tcr %llx\n", ttbr0, pm->pm_pt0pa, tcr);
>   printf("  vp1 = %llx\n", vp1);
>  
>   vp2 = vp1->vp[VP_IDX1(va)];
>   printf("  vp2 = %llx lp2 = %llx idx1 off %x\n",
>   vp2, vp1->l1[VP_IDX1(va)], VP_IDX1(va)*8);
> - if (vp2 == NULL) {
> + if (vp2 == NULL)
>   return;
> - }
>  
>   vp3 = vp2->vp[VP_IDX2(va)];
>   printf("  vp3 = %llx lp3 = %llx idx2 off %x\n",
>   vp3, vp2->l2[VP_IDX2(va)], VP_IDX2(va)*8);
> - if (vp3 == NULL) {
> + if (vp3 == NULL)
>   return;
> - }
>  
>   pted = vp3->vp[VP_IDX3(va)];
>   printf("  pted = %p lp3 = %llx idx3 off  %x\n",
>   pted, vp3->l3[VP_IDX3(va)], VP_IDX3(va)*8);
> -
> - return;
>  }
>  
>  #define NUM_ASID (1 << 16)
> 
Dale Rahn   dr...@dalerahn.com



Re: Another arm64 pmap cleanup diff

2017-04-01 Thread Dale Rahn
On Sat, Apr 01, 2017 at 06:08:23PM +1100, Jonathan Gray wrote:
> On Fri, Mar 31, 2017 at 02:03:37PM +0200, Mark Kettenis wrote:
> > On ARMv8, the translation table walk is fully coherent so there is no
> > reason to explicitly flush the cache before invalidating the TLB.  The
> > barrier that is included in out TLB flushing code should be enough to
> > guarantee that the TLB walking hardware sees the updated page table
> > contents, so the explicit barriers can go as well.  Diff also
> > sanitizes the code immediately around the removed bits of code to drop
> > redundant curly braces, avoid C++ style comments and removes some
> > blank lines that aren't particular useful.
> > 
> > Can somebody give this a spin on hardware with a Cortex-A57 and verify
> > that this doesn't make things more unstable?
> 
> I didn't encounter any problems running a make build with this change.
> It also slightly improved the build time:
> 
> 291m24.74s real   257m59.41s user16m05.09s system
> 286m32.07s real   255m44.31s user15m22.44s system
> 
I wanted to see this tested, but given this testing, ok drahn@
Dale Rahn   dr...@dalerahn.com



Re: amd64 struct reg

2017-03-21 Thread Dale Rahn
On Tue, Mar 21, 2017 at 04:45:01PM +0100, Mark Kettenis wrote:
> > Date: Tue, 21 Mar 2017 15:57:46 +0100 (CET)
> > From: Mark Kettenis <mark.kette...@xs4all.nl>
> > 
> > > Date: Mon, 20 Mar 2017 12:26:28 -0400
> > > From: Dale Rahn <dr...@dalerahn.com>
> > > 
> > > On Mon, Mar 20, 2017 at 11:54:03AM +0100, Mark Kettenis wrote:
> > > > > Date: Mon, 20 Mar 2017 13:31:32 +1100
> > > > > From: Jonathan Gray <j...@jsg.id.au>
> > > > > Cc: tech@openbsd.org
> > > > > Mail-Followup-To: Mark Kettenis <mark.kette...@xs4all.nl>, 
> > > > > tech@openbsd.org
> > > > > Content-Disposition: inline
> > > > > X-XS4ALL-DNSBL-Checked: mxdrop306.xs4all.net checked 210.15.216.215 
> > > > > against DNS blacklists
> > > > > X-CNFS-Analysis: v=2.2 cv=eoad9chX c=1 sm=0 tr=0
> > > > >   a=0rbIscUo4apI/L6UuJIdkA==:117 a=0rbIscUo4apI/L6UuJIdkA==:17
> > > > >   a=kj9zAlcOel0A:10 a=6Iz7jQTuP9IA:10 a=d_dndPjGE7KARIjjoGsA:9
> > > > >   a=CjuIK1q_8ugA:10
> > > > > X-Virus-Scanned: by XS4ALL Virus Scanner
> > > > > X-XS4ALL-Spam-Score: -0.5 () RP_MATCHES_RCVD, UNPARSEABLE_RELAY
> > > > > X-XS4ALL-Spam: NO
> > > > > Envelope-To: mark.kette...@xs4all.nl
> > > > > 
> > > > > On Mon, Mar 20, 2017 at 01:35:04AM +0100, Mark Kettenis wrote:
> > > > > > It turns out that pretty much all relevant aarch64 OSes use the same
> > > > > > layout for transferring registers in their debug interfaces.  Except
> > > > > > for us.  That doesn't make sense and would mean I'd have to do
> > > > > > additional work in my lldb porting efforts.
> > > > > > 
> > > > > > Diff below revises "struct reg" for amd64 to be compatible with what
> > > > > > NetBSD provides.  That just matches our naming conventions for 
> > > > > > struct
> > > > > > reg better.  The actual names are largely irrelevant as debuggers
> > > > > > hardcode the layouts anyway to support cross-debugging.
> > > > > > 
> > > > > > This struct isn't actually used yet, so these changes don't really
> > > > > > have any ABI consequences.  But once this is in, I'm planning on
> > > > > > making core dumps and ptrace actually work on arm64.
> > > > > > 
> > > > > > ok?
> > > > > 
> > > > > It looks like NetBSD just has a cross compiled toolchain
> > > > > nothing that runs on actual hardware.
> > > > > 
> > > > > Given FreeBSD does I'd consider that more relevant, and it
> > > > > has:
> > > > > 
> > > > > struct reg {
> > > > >   uint64_t x[30];
> > > > >   uint64_t lr;
> > > > >   uint64_t sp;
> > > > >   uint64_t elr;
> > > > >   uint32_t spsr;
> > > > > };
> > > > > 
> > > > > struct fpreg {
> > > > >   __uint128_t fp_q[32];
> > > > >   uint32_tfp_sr;
> > > > >   uint32_tfp_cr;
> > > > > };
> > > > 
> > > > The FreeBSD definition is equivalent to the NetBSD one:
> > > > 
> > > > lr  == x30
> > > > elr == pc
> > > > 
> > > > It's just that the names assigned by NetBSD make a little bit more
> > > > sense and follow the pattern we use on many other architectures.
> > > > 
> > > > NetBSD also includes the userland per-thread-register which I think is
> > > > a good idea.
> > > > 
> > > 
> > > Naming and order realistically doesn't matter that much, If it is easier 
> > > to
> > > move registers around to match a userland structure in process_*regs(),
> > > that is reasonable. That said: the more I have followed NetBSD's lead in 
> > > the
> > > past, the more I have regretted it later.
> > 
> > Here is an update diff that actually implementes process_read_regs().
> > Changed the struct reg definition slightly to make r_rl explicit to
> > match our struct trapframe a bit closer.  That makes the
> > process_read_regs() implementation more obvious.
> > 
> > This is enough to make core dumps useful, so I'd like to commit this
> > partial implementation.  Did exp

Re: amd64 struct reg

2017-03-20 Thread Dale Rahn
On Mon, Mar 20, 2017 at 09:15:09AM -0700, Philip Guenther wrote:
> On Mon, 20 Mar 2017, Dale Rahn wrote:
> ...
> > Including the thread pointer would seem to make sense, but there is there
> > a proc vs process issue there (thread vs p
> 
> Uh, the registers are _all_ per-thread!
> 
> 
> Philip
> 

Oops? I thought I had deleted that line of comment in the email. I had
looked it up and realized my mistake already.

Dale Rahn   dr...@dalerahn.com



Re: amd64 struct reg

2017-03-20 Thread Dale Rahn
On Mon, Mar 20, 2017 at 11:54:03AM +0100, Mark Kettenis wrote:
> > Date: Mon, 20 Mar 2017 13:31:32 +1100
> > From: Jonathan Gray <j...@jsg.id.au>
> > Cc: tech@openbsd.org
> > Mail-Followup-To: Mark Kettenis <mark.kette...@xs4all.nl>, tech@openbsd.org
> > Content-Disposition: inline
> > X-XS4ALL-DNSBL-Checked: mxdrop306.xs4all.net checked 210.15.216.215 against 
> > DNS blacklists
> > X-CNFS-Analysis: v=2.2 cv=eoad9chX c=1 sm=0 tr=0
> > a=0rbIscUo4apI/L6UuJIdkA==:117 a=0rbIscUo4apI/L6UuJIdkA==:17
> > a=kj9zAlcOel0A:10 a=6Iz7jQTuP9IA:10 a=d_dndPjGE7KARIjjoGsA:9
> > a=CjuIK1q_8ugA:10
> > X-Virus-Scanned: by XS4ALL Virus Scanner
> > X-XS4ALL-Spam-Score: -0.5 () RP_MATCHES_RCVD, UNPARSEABLE_RELAY
> > X-XS4ALL-Spam: NO
> > Envelope-To: mark.kette...@xs4all.nl
> > 
> > On Mon, Mar 20, 2017 at 01:35:04AM +0100, Mark Kettenis wrote:
> > > It turns out that pretty much all relevant aarch64 OSes use the same
> > > layout for transferring registers in their debug interfaces.  Except
> > > for us.  That doesn't make sense and would mean I'd have to do
> > > additional work in my lldb porting efforts.
> > > 
> > > Diff below revises "struct reg" for amd64 to be compatible with what
> > > NetBSD provides.  That just matches our naming conventions for struct
> > > reg better.  The actual names are largely irrelevant as debuggers
> > > hardcode the layouts anyway to support cross-debugging.
> > > 
> > > This struct isn't actually used yet, so these changes don't really
> > > have any ABI consequences.  But once this is in, I'm planning on
> > > making core dumps and ptrace actually work on arm64.
> > > 
> > > ok?
> > 
> > It looks like NetBSD just has a cross compiled toolchain
> > nothing that runs on actual hardware.
> > 
> > Given FreeBSD does I'd consider that more relevant, and it
> > has:
> > 
> > struct reg {
> > uint64_t x[30];
> > uint64_t lr;
> > uint64_t sp;
> > uint64_t elr;
> > uint32_t spsr;
> > };
> > 
> > struct fpreg {
> > __uint128_t fp_q[32];
> > uint32_tfp_sr;
> > uint32_tfp_cr;
> > };
> 
> The FreeBSD definition is equivalent to the NetBSD one:
> 
> lr  == x30
> elr == pc
> 
> It's just that the names assigned by NetBSD make a little bit more
> sense and follow the pattern we use on many other architectures.
> 
> NetBSD also includes the userland per-thread-register which I think is
> a good idea.
> 

Naming and order realistically doesn't matter that much, If it is easier to
move registers around to match a userland structure in process_*regs(),
that is reasonable. That said: the more I have followed NetBSD's lead in the
past, the more I have regretted it later.

Including the thread pointer would seem to make sense, but there is there
a proc vs process issue there (thread vs p

using __uint128_t for FPU is a lot better than the uint64 [64] that is there
now. 

Dale Rahn   dr...@dalerahn.com



arm64: allocate an empty user map for kernel mode.

2017-03-17 Thread Dale Rahn
When the kernel switches to the kernel pmap, the pmap had been previously
leaving the ttbr0 unmodified as the kernel pmap space is all accesse thru
ttbr1. Well at one point the ttbr0 was updated with the ttbr1 content, but
that caused MASSIVE failures.

This diff add an allocation of a zeroed/empty L0/L1 page table to put into
ttbr0 when switching to the kernel pmap. A zeroed top level buffer will
indicate there are no valid L1/L2 entries and cause any accesses to
low addresses to cause a data abort.

This table is also used when a core is about to switch to a new asid, if
the pmap determines that it needs to reuse an asid, this will prevent
speculative accesses on the old ttbr0 after tlb invalidation.

also another minor change invalid asid in pmap is always flagged as
'pmap_asid == -1' checking for 'pm_asid < 0' could cause confusion,
and was change to be matching the invalidation value.

A possible improvement in the below code it to allocate and zero a buffer
on a size name different than Lx_TABLE_ALIGN, but there appears to be no
appropriate define present.

diff --git a/sys/arch/arm64/arm64/pmap.c b/sys/arch/arm64/arm64/pmap.c
index d1fa358b85f..18aff664beb 100644
--- a/sys/arch/arm64/arm64/pmap.c
+++ b/sys/arch/arm64/arm64/pmap.c
@@ -1112,6 +1112,7 @@ CTASSERT(sizeof(struct pmapvp0) == 8192);
 
 int mappings_allocated = 0;
 int pted_allocated = 0;
+paddr_t pted_kernel_ttbr0;
 
 vaddr_t
 pmap_bootstrap(long kvo, paddr_t lpt1,  long kernelstart, long kernelend,
@@ -1214,7 +1215,12 @@ pmap_bootstrap(long kvo, paddr_t lpt1,  long 
kernelstart, long kernelend,
}
}
 
-   // XXX should this extend the l2 bootstrap mappings for kernel entries?
+   /* allocate an empty ttbr0 for when we are switched to kernel pmap */
+   pted_kernel_ttbr0 = pmap_steal_avail(Lx_TABLE_ALIGN, Lx_TABLE_ALIGN,
+   );
+   /* zero by pa, va may not be mapped */
+   bzero((void *)pa, Lx_TABLE_ALIGN);
+
 
/* now that we have mapping space for everything, lets map it */
/* all of these mappings are ram -> kernel va */
@@ -2311,7 +2317,12 @@ pmap_setttb(struct proc *p, paddr_t pagedir, struct pcb 
*pcb)
//int oasid = pm->pm_asid;
 
if (pm != pmap_kernel()) {
-   if (pm->pm_asid < 0) {
+   if (pm->pm_asid == -1) {
+   /*
+* We are running in kernel, about to switch to an
+* unknown asid, move to an unused ttbr0 entry
+*/
+   cpu_setttb(pted_kernel_ttbr0);
pmap_allocate_asid(pm);
pcb->pcb_pagedir = ((uint64_t)pm->pm_asid << 48) |
pm->pm_pt0pa;
@@ -2326,6 +2337,7 @@ pmap_setttb(struct proc *p, paddr_t pagedir, struct pcb 
*pcb)
cpu_tlb_flush();
}
} else {
-   // XXX what to do if switching to kernel pmap !?!?
+   /* switch userland to empty mapping page */
+   cpu_setttb(pted_kernel_ttbr0);
}
 }
Dale Rahn   dr...@dalerahn.com



arm64: Remove early ASID allocation

2017-03-17 Thread Dale Rahn
Do not preallocate the asid, wait until it is allcoated in pmap_setttb()
Simplifies the code by only having one location that does the asid
allocation.

There is no need to allocate the ASID direclty in pmap_activate as
the code in pmap setttb will do that allocation so this code is just
duplication that could potentially cause a race in the future.

diff --git a/sys/arch/arm64/arm64/pmap.c b/sys/arch/arm64/arm64/pmap.c
index 4d0c58c3663..d1fa358b85f 100644
--- a/sys/arch/arm64/arm64/pmap.c
+++ b/sys/arch/arm64/arm64/pmap.c
@@ -1403,14 +1403,6 @@ pmap_activate(struct proc *p)
pcb = >p_addr->u_pcb;
 
// printf("%s: called on proc %p %p\n", __func__, p,  pcb->pcb_pagedir);
-   if (pm != pmap_kernel() && pm->pm_asid == -1) {
-   // this userland process has no asid, allocate one.
-   pmap_allocate_asid(pm);
-   }
-
-   if (pm != pmap_kernel())
-   pcb->pcb_pagedir = ((uint64_t)pm->pm_asid << 48) | pm->pm_pt0pa;
-
psw = disable_interrupts();
if (p == curproc && pm != pmap_kernel() && pm != curcpu()->ci_curpm) {
// clean up old process
Dale Rahn   dr...@dalerahn.com



arm64 setjmp/longjmp once again.

2017-03-09 Thread Dale Rahn
 _JB_REG_R1121
-#define _JB_REG_R1222
-#define _JB_REG_R1323
-#define _JB_REG_R1424
+#define _JB_REG_SP  1
+#define _JB_REG_X19 2
+#define _JB_REG_X20 3
+#define _JB_REG_X21 4
+#define _JB_REG_X22 5
+#define _JB_REG_X23 6
+#define _JB_REG_X24 7
+#define _JB_REG_X25 8
+#define _JB_REG_X26 9
+#define _JB_REG_X2710
+#define _JB_REG_X2811
+#define _JB_REG_X2912
+#define _JB_REG_X3013
+#define _JB_REG_v8 14
+#define _JB_REG_v9 16
+#define _JB_REG_v1018
+#define _JB_REG_v1120
+#define _JB_REG_v1222
+#define _JB_REG_v1324
+#define _JB_REG_v1426
+#define _JB_REG_v1528
 
 /* Only valid with the _JB_MAGIC_SETJMP magic */
 
-#define _JB_SIGMASK25
+#define _JB_SIGMASK30
Index: lib/libc/arch/aarch64/gen/_setjmp.S
===
RCS file: /cvs/src/lib/libc/arch/aarch64/gen/_setjmp.S,v
retrieving revision 1.1
diff -u -p -r1.1 _setjmp.S
--- lib/libc/arch/aarch64/gen/_setjmp.S 11 Jan 2017 18:09:24 -  1.1
+++ lib/libc/arch/aarch64/gen/_setjmp.S 10 Mar 2017 06:41:15 -
@@ -49,10 +49,10 @@ ENTRY(_setjmp)
 
 #ifndef _STANDALONE
/* Store the vfp registers */
-   stp d8, d9, [x0], #16
-   stp d10, d11, [x0], #16
-   stp d12, d13, [x0], #16
-   stp d14, d15, [x0]
+   stp q8, q9, [x0], #32
+   stp q10, q11, [x0], #32
+   stp q12, q13, [x0], #32
+   stp q14, q15, [x0]
 #endif
 
/* Return value */
@@ -84,14 +84,15 @@ ENTRY(_longjmp)
 
 #ifndef _STANDALONE
/* Restore the vfp registers */
-   ldp d8, d9, [x0], #16
-   ldp d10, d11, [x0], #16
-   ldp d12, d13, [x0], #16
-   ldp d14, d15, [x0]
+   ldp q8, q9, [x0], #32
+   ldp q10, q11, [x0], #32
+   ldp q12, q13, [x0], #32
+   ldp q14, q15, [x0]
 #endif
 
/* Load the return value */
-   mov x0, x1
+   cmp w1, #0
+   csinc   w0, w1, wzr, ne
ret
 
 botch:
Index: lib/libc/arch/aarch64/gen/setjmp.S
===
RCS file: /cvs/src/lib/libc/arch/aarch64/gen/setjmp.S,v
retrieving revision 1.2
diff -u -p -r1.2 setjmp.S
--- lib/libc/arch/aarch64/gen/setjmp.S  8 Mar 2017 06:28:12 -   1.2
+++ lib/libc/arch/aarch64/gen/setjmp.S  10 Mar 2017 06:41:15 -
@@ -34,7 +34,6 @@
 #include 
 
 ENTRY(setjmp)
-
mov x2, x0  /* save jmpbuf in x2 */
/* Store the signal mask */
mov w1, #0  /* set */
@@ -57,10 +56,10 @@ ENTRY(setjmp)
stp x29, x30, [x0], #16
 
/* Store the vfp registers */
-   stp d8, d9, [x0], #16
-   stp d10, d11, [x0], #16
-   stp d12, d13, [x0], #16
-   stp d14, d15, [x0]
+   stp q8, q9, [x0], #32
+   stp q10, q11, [x0], #32
+   stp q12, q13, [x0], #32
+   stp q14, q15, [x0]
 
/* Return value */
mov x0, #0
@@ -100,10 +99,10 @@ ENTRY(longjmp)
ldp x29, x30, [x0], #16
 
/* Restore the vfp registers */
-   ldp d8, d9, [x0], #16
-   ldp d10, d11, [x0], #16
-   ldp d12, d13, [x0], #16
-   ldp d14, d15, [x0]
+   ldp q8, q9, [x0], #32
+   ldp q10, q11, [x0], #32
+   ldp q12, q13, [x0], #32
+   ldp q14, q15, [x0]
 
/* Load the return value */
cmp w3, #0
Index: regress/lib/libc/setjmp-signal/setjmp-signal.c
===
RCS file: /cvs/src/regress/lib/libc/setjmp-signal/setjmp-signal.c,v
retrieving revision 1.3
diff -u -p -r1.3 setjmp-signal.c
--- regress/lib/libc/setjmp-signal/setjmp-signal.c  3 Jan 2003 20:46:05 
-   1.3
+++ regress/lib/libc/setjmp-signal/setjmp-signal.c  10 Mar 2017 06:41:15 
-
@@ -19,7 +19,7 @@ main()
 {
signal(SIGSEGV, segv_handler);
if (setjmp(jb) == 0) {
-   *((int *)0L) = 0;
+   *((volatile int *)0L) = 0;
return (1);
}
return (0);

Dale Rahn   dr...@dalerahn.com



Re: arm64 SMP support, diff #3/5

2017-03-06 Thread Dale Rahn
On Thu, Feb 23, 2017 at 01:03:57PM +1100, Jonathan Gray wrote:
> On Wed, Feb 22, 2017 at 06:45:52PM -0500, Dale Rahn wrote:
> > Add psci 2.0 features to spin up/down/suspend processors.
> > 
> > This change uses extern weak symbols to determine if the platform supports
> > the cpu_on/cpu_off/cpu_suspend features. If available, it saves the fdt
> > provided argument values.
> > 
> > Some PSCI calls will return an int status, so the callfn has been updated
> > to return that status (eg cpu_on fail)
> > 
> > diff --git a/sys/dev/fdt/psci.c b/sys/dev/fdt/psci.c
> > index fceafd0e9ba..3177ff06beb 100644
> > --- a/sys/dev/fdt/psci.c
> > +++ b/sys/dev/fdt/psci.c
> > @@ -29,12 +29,19 @@
> >  extern void (*cpuresetfn)(void);
> >  extern void (*powerdownfn)(void);
> >  
> > +extern int (*cpu_suspend_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_off_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_on_fn)(uint64_t, uint64_t) __attribute__((weak)) ;
> > +
> >  #define SYSTEM_OFF 0x8408
> >  #define SYSTEM_RESET   0x8409
> >  
> >  struct psci_softc {
> > struct devicesc_dev;
> > -   void (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int  (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int sc_cpu_on;
> > +   int sc_cpu_off;
> > +   int sc_cpu_suspend;
> >  };
> 
> spacing is out here
> 
> >  
> >  struct psci_softc *psci_sc;
> > @@ -43,9 +50,12 @@ int  psci_match(struct device *, void *, void *);
> >  void   psci_attach(struct device *, struct device *, void *);
> >  void   psci_reset(void);
> >  void   psci_powerdown(void);
> > +intpsci_cpu_suspend(void);
> > +intpsci_cpu_off(void);
> > +intpsci_cpu_on(uint64_t, uint64_t);
> >  
> > -extern void hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > -extern void smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> >  
> >  struct cfattach psci_ca = {
> > sizeof(struct psci_softc), psci_match, psci_attach
> > @@ -83,6 +93,24 @@ psci_attach(struct device *parent, struct device *self, 
> > void *aux)
> > psci_sc = sc;
> > cpuresetfn = psci_reset;
> > powerdownfn = psci_powerdown;
> > +
> > +   if (_suspend_fn != NULL) {
> > +   sc->sc_cpu_suspend = OF_getpropint(faa->fa_node, "cpu_suspend", 
> > 0);
> > +   if (sc->sc_cpu_suspend != 0)
> > +   cpu_suspend_fn = psci_cpu_suspend;
> > +   }
> > +
> > +   if (_on_fn != NULL) {
> > +   sc->sc_cpu_on = OF_getpropint(faa->fa_node, "cpu_on", 0);
> > +   if (sc->sc_cpu_on != 0)
> > +   cpu_on_fn = psci_cpu_on;
> > +   }
> > +
> > +   if (_off_fn != NULL) {
> > +   sc->sc_cpu_off = OF_getpropint(faa->fa_node, "cpu_off", 0);
> > +   if (sc->sc_cpu_off != 0)
> > +   cpu_off_fn = psci_cpu_off;
> > +   }
> >  }
> 
> Can these nodes be relied on as being present?
> 
> Outside of qemu -M virt
> 
> psci {
> migrate = <0x8405>;
> cpu_on = <0x8403>;
> cpu_off = <0x8402>;
> cpu_suspend = <0x8401>;
> method = "hvc";
> compatible = "arm,psci-0.2", "arm,psci";
> };
> 
> I can only find
> 
> arm/boot/dts/artpec6.dtsi:  cpu_on = <0x8403>;
> arm/boot/dts/ecx-common.dtsi:   cpu_on  = <0x8406>;
> arm/boot/dts/keystone.dtsi: cpu_on  = <0x8403>;
> arm/boot/dts/xenvm-4.2.dts: cpu_on  = <2>;
> arm64/boot/dts/al/alpine-v2.dtsi:   cpu_on = <0x8403>;
> arm64/boot/dts/exynos/exynos5433.dtsi:  cpu_on = <0xC403>;
> arm64/boot/dts/lg/lg1312.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/lg/lg1313.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/mediatek/mt8173.dtsi:cpu_on= <0x8403>;
> arm64/boot/dts/sprd/sc9836.dtsi:cpu_on  = 
> <0xc403>;
> 
> 0x8403 is SMC32, 0xC403 is SMC64 and there is actually
> a different calling convention for these...
> 
> The existing SYSTEM_OFF and SYSTEM_RESET u

arm64 setjmp/longjmp signal mask storage error

2017-03-06 Thread Dale Rahn
Setjmp, longjmp was converted from calling sigprocmask to invoking
sigprocmask directly. The ABI for the function call and the syscall
are not the same and the register manipulation code was not updated 
in the change.

This diff moves the jmpbuf to x2 for the duration of the sigprocmask syscall
and loads x0/x1 with the appropriate values and saves the returned x0
as the signal mask.

Other than storing x0 and x30 (lr) on the stack, this should be equivalent
to calling sigprocmask 'bl sigprocmaskB instead of 'SYSTRAP(sigprocmask)'

diff --git a/lib/libc/arch/aarch64/gen/setjmp.S 
b/lib/libc/arch/aarch64/gen/setjmp.S
index ba4010be7ff..76c1be5b9b5 100644
--- a/lib/libc/arch/aarch64/gen/setjmp.S
+++ b/lib/libc/arch/aarch64/gen/setjmp.S
@@ -34,16 +34,15 @@
 #include 
 
 ENTRY(setjmp)
-   stp x0, x30, [sp, #-16]!
 
+   mov x2, x0  /* save jmpbuf in x2 */
/* Store the signal mask */
-   add x2, x0, #(_JB_SIGMASK * 8)  /* oset */
-   mov x1, #0  /* set */
+   mov w1, #0  /* set */
mov x0, #1  /* SIG_BLOCK */
SYSTRAP(sigprocmask)
+   str w0, [x2, #(_JB_SIGMASK * 8)]/* oset */
 
-   ldp x0, x30, [sp], #16
-
+   mov x0, x2
/* Store the magic value and stack pointer */
ldr x8, .Lmagic
mov x9, sp
@@ -73,18 +72,15 @@ ENTRY(setjmp)
 END_STRONG(setjmp)
 
 ENTRY(longjmp)
-   stp x0, x1, [sp, #-32]!
-   str x30, [sp, #24]
+   mov x2, x0  /* move jmpbuf */
+   mov x3, x1  /* final return value */
 
/* Restore the signal mask */
-   mov x2, #0  /* oset */
-   add x1, x0, #(_JB_SIGMASK * 8)  /* set */
+   ldr x1, [x2, #(_JB_SIGMASK * 8)]/* set */
mov x0, #3  /* SIG_SETMASK */
SYSTRAP(sigprocmask)
 
-   ldr x30, [sp, #24]
-   ldp x0, x1, [sp], #32
-
+   mov x0, x2
/* Check the magic value */
ldr x8, [x0], #8
ldr x9, .Lmagic
@@ -110,7 +106,7 @@ ENTRY(longjmp)
ldp d14, d15, [x0]
 
/* Load the return value */
-   mov x0, x1
+   mov x0, x3
ret
 
 botch:
Dale Rahn   dr...@dalerahn.com



Switch agtimer from physical timer to virtual timer.

2017-02-26 Thread Dale Rahn
Switch agtimer from physical timer to virtual timer.

This diff makes the arm generic timer for arm64 use the virtual
timer instead of the physical timer.

Linux uses the virtual timer in the kernel unless it is operating in
hypervisor mode.

The virtual timer is set up so that a hypervisor can run an operating system
under it. This allows the hypervisor adjust time, eg to make it look like
the time spent in a higher level exeption didn't occur.

Access to the physical timer can be disabled from higher exception level,
making it required that the operating system use the virtual timer.

The use of virtual timer vs physical timer came up in a discussion with
Patrick, I decided it was better to just switch the timer now. 

diff --git a/sys/arch/arm64/dev/agtimer.c b/sys/arch/arm64/dev/agtimer.c
index 278ad9a0e19..3c9abc6011a 100644
--- a/sys/arch/arm64/dev/agtimer.c
+++ b/sys/arch/arm64/dev/agtimer.c
@@ -33,9 +33,9 @@
 #include 
 
 /* registers */
-#define GTIMER_CNTP_CTL_ENABLE (1 << 0)
-#define GTIMER_CNTP_CTL_IMASK  (1 << 1)
-#define GTIMER_CNTP_CTL_ISTATUS(1 << 2)
+#define GTIMER_CNTV_CTL_ENABLE (1 << 0)
+#define GTIMER_CNTV_CTL_IMASK  (1 << 1)
+#define GTIMER_CNTV_CTL_ISTATUS(1 << 2)
 
 #define TIMER_FREQUENCY24 * 1000 * 1000 /* ARM core clock */
 int32_t agtimer_frequency = TIMER_FREQUENCY;
@@ -96,7 +96,7 @@ agtimer_readcnt64(void)
uint64_t val;
 
__asm volatile("isb" : : : "memory");
-   __asm volatile("MRS %x0, CNTPCT_EL0" : "=r" (val));
+   __asm volatile("MRS %x0, CNTVCT_EL0" : "=r" (val));
 
return (val);
 }
@@ -116,7 +116,7 @@ agtimer_get_ctrl(void)
 {
uint32_t val;
 
-   __asm volatile("MRS %x0, CNTP_CTL_EL0" : "=r" (val));
+   __asm volatile("MRS %x0, CNTV_CTL_EL0" : "=r" (val));
 
return (val);
 }
@@ -124,7 +124,7 @@ agtimer_get_ctrl(void)
 static inline int
 agtimer_set_ctrl(uint32_t val)
 {
-   __asm volatile("MSR CNTP_CTL_EL0, %x0" : : "r" (val));
+   __asm volatile("MSR CNTV_CTL_EL0, %x0" : : "r" (val));
__asm volatile("isb" : : : "memory");
 
return (0);
@@ -133,7 +133,7 @@ agtimer_set_ctrl(uint32_t val)
 static inline int
 agtimer_set_tval(uint32_t val)
 {
-   __asm volatile("MSR CNTP_TVAL_EL0, %x0" : : "r" (val));
+   __asm volatile("MSR CNTV_TVAL_EL0, %x0" : : "r" (val));
__asm volatile("isb" : : : "memory");
 
return (0);
@@ -296,18 +296,16 @@ agtimer_cpu_initclocks()
sc->sc_ticks_err_cnt = sc->sc_ticks_per_second % hz;
pc->pc_ticks_err_sum = 0;
 
-   /* Setup secure and non-secure timer IRQs. */
-   arm_intr_establish_fdt_idx(sc->sc_node, 0, IPL_CLOCK,
-   agtimer_intr, NULL, "tick");
-   arm_intr_establish_fdt_idx(sc->sc_node, 1, IPL_CLOCK,
+   /* configure virtual timer interupt */
+   arm_intr_establish_fdt_idx(sc->sc_node, 2, IPL_CLOCK,
agtimer_intr, NULL, "tick");
 
next = agtimer_readcnt64() + sc->sc_ticks_per_intr;
pc->pc_nexttickevent = pc->pc_nextstatevent = next;
 
reg = agtimer_get_ctrl();
-   reg &= ~GTIMER_CNTP_CTL_IMASK;
-   reg |= GTIMER_CNTP_CTL_ENABLE;
+   reg &= ~GTIMER_CNTV_CTL_IMASK;
+   reg |= GTIMER_CNTV_CTL_ENABLE;
agtimer_set_tval(sc->sc_ticks_per_second);
agtimer_set_ctrl(reg);
 }
@@ -381,8 +379,8 @@ agtimer_startclock(void)
pc->pc_nexttickevent = pc->pc_nextstatevent = nextevent;
 
reg = agtimer_get_ctrl();
-   reg &= ~GTIMER_CNTP_CTL_IMASK;
-   reg |= GTIMER_CNTP_CTL_ENABLE;
+   reg &= ~GTIMER_CNTV_CTL_IMASK;
+   reg |= GTIMER_CNTV_CTL_ENABLE;
agtimer_set_tval(sc->sc_ticks_per_second);
agtimer_set_ctrl(reg);
 }
Dale Rahn   dr...@dalerahn.com



Re: arm64 SMP support, diff #3/5

2017-02-22 Thread Dale Rahn
On Thu, Feb 23, 2017 at 01:03:57PM +1100, Jonathan Gray wrote:
> On Wed, Feb 22, 2017 at 06:45:52PM -0500, Dale Rahn wrote:
> > Add psci 2.0 features to spin up/down/suspend processors.
> > 
> > This change uses extern weak symbols to determine if the platform supports
> > the cpu_on/cpu_off/cpu_suspend features. If available, it saves the fdt
> > provided argument values.
> > 
> > Some PSCI calls will return an int status, so the callfn has been updated
> > to return that status (eg cpu_on fail)
> > 
> > diff --git a/sys/dev/fdt/psci.c b/sys/dev/fdt/psci.c
> > index fceafd0e9ba..3177ff06beb 100644
> > --- a/sys/dev/fdt/psci.c
> > +++ b/sys/dev/fdt/psci.c
> > @@ -29,12 +29,19 @@
> >  extern void (*cpuresetfn)(void);
> >  extern void (*powerdownfn)(void);
> >  
> > +extern int (*cpu_suspend_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_off_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_on_fn)(uint64_t, uint64_t) __attribute__((weak)) ;
> > +
> >  #define SYSTEM_OFF 0x8408
> >  #define SYSTEM_RESET   0x8409
> >  
> >  struct psci_softc {
> > struct devicesc_dev;
> > -   void (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int  (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int sc_cpu_on;
> > +   int sc_cpu_off;
> > +   int sc_cpu_suspend;
> >  };
> 
> spacing is out here
> 
sure.
> >  
> >  struct psci_softc *psci_sc;
> > @@ -43,9 +50,12 @@ int  psci_match(struct device *, void *, void *);
> >  void   psci_attach(struct device *, struct device *, void *);
> >  void   psci_reset(void);
> >  void   psci_powerdown(void);
> > +intpsci_cpu_suspend(void);
> > +intpsci_cpu_off(void);
> > +intpsci_cpu_on(uint64_t, uint64_t);
> >  
> > -extern void hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > -extern void smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> >  
> >  struct cfattach psci_ca = {
> > sizeof(struct psci_softc), psci_match, psci_attach
> > @@ -83,6 +93,24 @@ psci_attach(struct device *parent, struct device *self, 
> > void *aux)
> > psci_sc = sc;
> > cpuresetfn = psci_reset;
> > powerdownfn = psci_powerdown;
> > +
> > +   if (_suspend_fn != NULL) {
> > +   sc->sc_cpu_suspend = OF_getpropint(faa->fa_node, "cpu_suspend", 
> > 0);
> > +   if (sc->sc_cpu_suspend != 0)
> > +   cpu_suspend_fn = psci_cpu_suspend;
> > +   }
> > +
> > +   if (_on_fn != NULL) {
> > +   sc->sc_cpu_on = OF_getpropint(faa->fa_node, "cpu_on", 0);
> > +   if (sc->sc_cpu_on != 0)
> > +   cpu_on_fn = psci_cpu_on;
> > +   }
> > +
> > +   if (_off_fn != NULL) {
> > +   sc->sc_cpu_off = OF_getpropint(faa->fa_node, "cpu_off", 0);
> > +   if (sc->sc_cpu_off != 0)
> > +   cpu_off_fn = psci_cpu_off;
> > +   }
> >  }
> 
> Can these nodes be relied on as being present?
> 
> Outside of qemu -M virt
> 
> psci {
> migrate = <0x8405>;
> cpu_on = <0x8403>;
> cpu_off = <0x8402>;
> cpu_suspend = <0x8401>;
> method = "hvc";
> compatible = "arm,psci-0.2", "arm,psci";
> };
> 
> I can only find
> 
> arm/boot/dts/artpec6.dtsi:  cpu_on = <0x8403>;
> arm/boot/dts/ecx-common.dtsi:   cpu_on  = <0x8406>;
> arm/boot/dts/keystone.dtsi: cpu_on  = <0x8403>;
> arm/boot/dts/xenvm-4.2.dts: cpu_on  = <2>;
> arm64/boot/dts/al/alpine-v2.dtsi:   cpu_on = <0x8403>;
> arm64/boot/dts/exynos/exynos5433.dtsi:  cpu_on = <0xC403>;
> arm64/boot/dts/lg/lg1312.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/lg/lg1313.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/mediatek/mt8173.dtsi:cpu_on= <0x8403>;
> arm64/boot/dts/sprd/sc9836.dtsi:cpu_on  = 
> <0xc403>;
> 
> 0x8403 is SMC32, 0xC403 is SMC64 and there is actually
> a different calling convention for these...
> 
> The existing SYSTEM_OFF and SYSTEM_RES

arm64 SMP support, diff #5/5

2017-02-22 Thread Dale Rahn
ARM64 SMP support

This is the rest of the pieces of SMP support for arm64, major changes:

switch to ticket lock instead of simple lock
add code to print core type (printed from each core as it spins up)

IPI support is not yet added with this diff, however this should be 
already reasonably functional. FP context is saved on entry and when
runnable tasks are made avaiable 'sev' will wake sleeping processors.
The mips64_ipi_init() comment below is a reminder to implement IPI.

cpbus and late attach of the processor was added so that the device
to spin up the cores would be attached and available.


diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c
new file mode 100644
index 000..6a6f10a9727
--- /dev/null
+++ b/sys/arch/arm64/arm64/cpu.c
@@ -0,0 +1,336 @@
+/* $OpenBSD$ */
+
+/*
+ * Copyright (c) 2016 Dale Rahn <dr...@dalerahn.com>
+ * Copyright (c) 1997-2004 Opsycon AB (www.opsycon.se)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+intcpumatch(struct device *, void *, void *);
+void   cpuattach(struct device *, struct device *, void *);
+
+#ifdef MULTIPROCESSOR
+extern struct cpu_info cpu_info_primary;
+struct cpu_info *cpu_info_list = _info_primary;
+struct cpuset cpus_running;
+struct cpu_info *cpu_info_secondaries[MAXCPUS];
+void cpu_boot_secondary(struct cpu_info *ci);
+void cpu_hatch_secondary(struct cpu_info *ci);
+
+#endif
+
+struct cpu_cores {
+   int  id;
+   char*name;
+};
+
+struct cpu_cores cpu_cores_none[] = {
+   { 0x0, "Unknown" },
+};
+
+struct cpu_cores cpu_cores_arm[] = {
+   { CPU_PART_CORTEX_A53, "Cortex-A53" },
+   { CPU_PART_CORTEX_A57, "Cortex-A57" },
+   { 0x0, "Unknown" },
+};
+
+/* arm cores makers */
+const struct implementers {
+   int   id;
+   char *name;
+   struct cpu_cores*corelist;
+} cpu_implementers[] = {
+   { CPU_IMPL_ARM, "ARM",  cpu_cores_arm },
+   { CPU_IMPL_BROADCOM,"Broadcom", cpu_cores_none },
+   { 0,"", NULL },
+};
+
+
+void
+cpu_identify(struct cpu_info *ci)
+{
+   uint64_t midr, impl, part;
+   char *impl_name = "Unknown";
+   char *part_name = "Unknown";
+   struct cpu_cores *coreselecter = NULL;
+
+   int i;
+
+   midr = READ_SPECIALREG(midr_el1);
+
+   impl = CPU_IMPL(midr);
+   part = CPU_PART(midr);
+
+   for (i = 0; cpu_implementers[i].id != 0; i++) {
+   if (cpu_implementers[i].id == impl) {
+   impl_name = cpu_implementers[i].name;
+   coreselecter =  cpu_implementers[i].corelist;
+   break;
+   }
+   }
+
+   if (impl_name != NULL) {
+   for (i = 0; coreselecter[i].id != 0; i++) {
+   if (part == coreselecter[i].id) {
+   part_name = coreselecter[i].name; 
+   }
+   }
+   printf(" %s %s r%dp%d", impl_name, part_name, CPU_VAR(midr),
+   CPU_REV(midr));
+   } else {
+   printf (" unknown implementer");
+   
+   }
+}
+
+void
+cpuattach(struct device *parent, struct device *dev, void *aux)
+{
+   struct fdt_attach_args *faa = aux;
+   struct cpu_info *ci;
+   int cpuno = dev->dv_unit;
+
+   if (cpuno == 0) {
+   ci = _info_primary;
+#ifdef MULTIPROCES

arm64 SMP support, diff #4/5

2017-02-22 Thread Dale Rahn
Introduce KERNEL_LOCK into arm64 code.

arm_do_pending_intr was changed to check if any pending would be serviced
before the loop to avoid obtaining KERNEL_LOCK, otherwise KERNEL_LOCK
would be acquired even if the previous IPL level was > IPL_SOFTTTY.
This would unnessarily grab the kernel lock as well as possibly introduce
a deadlock if KERNEL_LOCK was always obtained in the code at that location.

This prevents grabbing a kernel lock if the interrupt was taken above
IPL_SOFTTTY.

diff --git a/sys/arch/arm64/arm64/intr.c b/sys/arch/arm64/arm64/intr.c
index f41cc8fbda9..7b26ccb5457 100644
--- a/sys/arch/arm64/arm64/intr.c
+++ b/sys/arch/arm64/arm64/intr.c
@@ -446,12 +446,17 @@ arm_do_pending_intr(int pcpl)
oldirqstate = disable_interrupts(); \
}
 
-   do {
+   while (ci->ci_ipending & arm_smask[pcpl]) {
+   KERNEL_LOCK();
+
DO_SOFTINT(SIR_TTY, IPL_SOFTTTY);
DO_SOFTINT(SIR_NET, IPL_SOFTNET);
DO_SOFTINT(SIR_CLOCK, IPL_SOFTCLOCK);
DO_SOFTINT(SIR_SOFT, IPL_SOFT);
-   } while (ci->ci_ipending & arm_smask[pcpl]);
+
+   KERNEL_UNLOCK();
+   }
+
 
/* Don't use splx... we are here already! */
arm_intr_func.setipl(pcpl);
diff --git a/sys/arch/arm64/arm64/syscall.c b/sys/arch/arm64/arm64/syscall.c
index a497cd74e74..39ab96133c1 100644
--- a/sys/arch/arm64/arm64/syscall.c
+++ b/sys/arch/arm64/arm64/syscall.c
@@ -130,5 +130,7 @@ child_return(arg)
frame->tf_x[1] = 1;
frame->tf_spsr &= ~PSR_C;   /* carry bit */
 
+   KERNEL_UNLOCK();
+
mi_child_return(p);
 }
diff --git a/sys/arch/arm64/arm64/trap.c b/sys/arch/arm64/arm64/trap.c
index 1dc151fe143..b1a4664af8b 100644
--- a/sys/arch/arm64/arm64/trap.c
+++ b/sys/arch/arm64/arm64/trap.c
@@ -173,7 +173,9 @@ data_abort(struct trapframe *frame, uint64_t esr, int 
lower, int exe)
 
/* Fault in the user page: */
if (!pmap_fault_fixup(map->pmap, va, access_type, 1)) {
+   KERNEL_LOCK();
error = uvm_fault(map, va, ftype, access_type);
+   KERNEL_UNLOCK();
}
 
//PROC_LOCK(p);
@@ -185,7 +187,9 @@ data_abort(struct trapframe *frame, uint64_t esr, int 
lower, int exe)
 * kernel.
 */
if (!pmap_fault_fixup(map->pmap, va, access_type, 0)) {
+   KERNEL_LOCK();
error = uvm_fault(map, va, ftype, access_type);
+   KERNEL_UNLOCK();
}
}
 
@@ -198,7 +202,9 @@ data_abort(struct trapframe *frame, uint64_t esr, int 
lower, int exe)
sv.sival_ptr = (u_int64_t *)far;
dumpregs(frame);
 
+   KERNEL_LOCK();
trapsignal(p, sig, 0, SEGV_ACCERR, sv);
+   KERNEL_UNLOCK();
} else {
if (curcpu()->ci_idepth == 0 &&
    pcb->pcb_onfault != 0) {

Dale Rahn   dr...@dalerahn.com



arm64 SMP support, diff #3/5

2017-02-22 Thread Dale Rahn
Add psci 2.0 features to spin up/down/suspend processors.

This change uses extern weak symbols to determine if the platform supports
the cpu_on/cpu_off/cpu_suspend features. If available, it saves the fdt
provided argument values.

Some PSCI calls will return an int status, so the callfn has been updated
to return that status (eg cpu_on fail)

diff --git a/sys/dev/fdt/psci.c b/sys/dev/fdt/psci.c
index fceafd0e9ba..3177ff06beb 100644
--- a/sys/dev/fdt/psci.c
+++ b/sys/dev/fdt/psci.c
@@ -29,12 +29,19 @@
 extern void (*cpuresetfn)(void);
 extern void (*powerdownfn)(void);
 
+extern int (*cpu_suspend_fn)(void) __attribute__((weak)) ;
+extern int (*cpu_off_fn)(void) __attribute__((weak)) ;
+extern int (*cpu_on_fn)(uint64_t, uint64_t) __attribute__((weak)) ;
+
 #define SYSTEM_OFF 0x8408
 #define SYSTEM_RESET   0x8409
 
 struct psci_softc {
struct devicesc_dev;
-   void (*callfn)(uint32_t, uint32_t, uint32_t, 
uint32_t);
+   int  (*callfn)(uint32_t, uint32_t, uint32_t, 
uint32_t);
+   int sc_cpu_on;
+   int sc_cpu_off;
+   int sc_cpu_suspend;
 };
 
 struct psci_softc *psci_sc;
@@ -43,9 +50,12 @@ int  psci_match(struct device *, void *, void *);
 void   psci_attach(struct device *, struct device *, void *);
 void   psci_reset(void);
 void   psci_powerdown(void);
+intpsci_cpu_suspend(void);
+intpsci_cpu_off(void);
+intpsci_cpu_on(uint64_t, uint64_t);
 
-extern void hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
-extern void smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
+extern int hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
+extern int smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
 
 struct cfattach psci_ca = {
sizeof(struct psci_softc), psci_match, psci_attach
@@ -83,6 +93,24 @@ psci_attach(struct device *parent, struct device *self, void 
*aux)
psci_sc = sc;
cpuresetfn = psci_reset;
powerdownfn = psci_powerdown;
+
+   if (_suspend_fn != NULL) {
+   sc->sc_cpu_suspend = OF_getpropint(faa->fa_node, "cpu_suspend", 
0);
+   if (sc->sc_cpu_suspend != 0)
+   cpu_suspend_fn = psci_cpu_suspend;
+   }
+
+   if (_on_fn != NULL) {
+   sc->sc_cpu_on = OF_getpropint(faa->fa_node, "cpu_on", 0);
+   if (sc->sc_cpu_on != 0)
+   cpu_on_fn = psci_cpu_on;
+   }
+
+   if (_off_fn != NULL) {
+   sc->sc_cpu_off = OF_getpropint(faa->fa_node, "cpu_off", 0);
+   if (sc->sc_cpu_off != 0)
+   cpu_off_fn = psci_cpu_off;
+   }
 }
 
 void
@@ -100,3 +128,31 @@ psci_powerdown(void)
if (sc->callfn)
(*sc->callfn)(SYSTEM_OFF, 0, 0, 0);
 }
+
+int
+psci_cpu_suspend()
+{
+   struct psci_softc *sc = psci_sc;
+   if (sc->callfn)
+   return (*sc->callfn)(sc->sc_cpu_suspend, 0, 0, 0);
+   return -1;
+}
+
+int
+psci_cpu_off()
+{
+   struct psci_softc *sc = psci_sc;
+   if (sc->callfn)
+   return (*sc->callfn)(sc->sc_cpu_off, 0, 0, 0);
+   return -1;
+
+}
+
+int
+psci_cpu_on(uint64_t mpidr, uint64_t pc)
+{
+   struct psci_softc *sc = psci_sc;
+   if (sc->callfn)
+   return (*sc->callfn)(sc->sc_cpu_on, mpidr, pc, 0);
+   return -1;
+}

Dale Rahn   dr...@dalerahn.com



arm64 SMP support, diff #1/5

2017-02-22 Thread Dale Rahn
   mtx_enter(>mdpage.pv_mtx);
+   while (!LIST_EMPTY(&(pg->mdpage.pv_list))) {
+   pted = LIST_FIRST(&(pg->mdpage.pv_list));
+   pmap_t pm = pted->pted_pmap;
+   mtx_leave(>mdpage.pv_mtx);
+   mtx_enter(>pm_mtx);
+   if (pted != LIST_FIRST(&(pg->mdpage.pv_list))) {
+   // race lost, retry
+   mtx_leave(>pm_mtx);
+   mtx_enter(>mdpage.pv_mtx);
+   continue;
+   }
 
-   LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
-   pmap_page_ro(pted->pted_pmap, pted->pted_va, prot);
+   pmap_remove_pted(pted->pted_pmap, pted);
+   mtx_leave(>pm_mtx);
+   mtx_enter(>mdpage.pv_mtx);
}
+
+   /* page is being reclaimed, sync icache next use - XXX cull? */
+   atomic_clearbits_int(>pg_flags, PG_PMAP_EXE);
+
+   mtx_leave(>mdpage.pv_mtx);
splx(s);
+   return;
 }
 
 
@@ -1910,6 +1942,7 @@ int pmap_clear_modify(struct vm_page *pg)
int s;
 
s = splvm();
+   mtx_enter(>mdpage.pv_mtx);
 
pg->pg_flags &= ~PG_PMAP_MOD;
 
@@ -1921,6 +1954,7 @@ int pmap_clear_modify(struct vm_page *pg)
 
ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
}
+   mtx_leave(>mdpage.pv_mtx);
splx(s);
 
return 0;
@@ -1940,6 +1974,7 @@ int pmap_clear_reference(struct vm_page *pg)
int s;
 
s = splvm();
+   mtx_enter(>mdpage.pv_mtx);
 
pg->pg_flags &= ~PG_PMAP_REF;
 
@@ -1948,6 +1983,7 @@ int pmap_clear_reference(struct vm_page *pg)
pmap_pte_insert(pted);
ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
}
+   mtx_leave(>mdpage.pv_mtx);
splx(s);
 
return 0;
diff --git a/sys/arch/arm64/include/pmap.h b/sys/arch/arm64/include/pmap.h
index 4f0044cd65a..1825316a217 100644
--- a/sys/arch/arm64/include/pmap.h
+++ b/sys/arch/arm64/include/pmap.h
@@ -73,6 +73,7 @@ struct pmap {
int pm_asid;
int pm_active;
int pm_refs;/* ref count */
+   struct mutex pm_mtx;
struct pmap_statistics  pm_stats;   /* pmap statistics */
 };
 
@@ -125,12 +126,14 @@ extern vaddr_tpmap_curmaxkvaddr;
 #define __HAVE_VM_PAGE_MD
 struct vm_page_md {
LIST_HEAD(,pte_desc) pv_list;
+   struct mutex pv_mtx;
int pvh_attrs;  /* page attributes */
 };
 
-#define VM_MDPAGE_INIT(pg) do {\
-LIST_INIT(&((pg)->mdpage.pv_list)); \
-   (pg)->mdpage.pvh_attrs = 0; \
+#define VM_MDPAGE_INIT(pg) do {\
+LIST_INIT(&((pg)->mdpage.pv_list));\
+   mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \
+   (pg)->mdpage.pvh_attrs = 0; \
 } while (0)
 
 #endif /* _LOCORE */

Dale Rahn   dr...@dalerahn.com



arm64 SMP support, diff #2/5

2017-02-22 Thread Dale Rahn
   ih->ih_count.ec_count++;
 
+#ifdef MULTIPROCESSOR
+   if (need_lock)
+   KERNEL_UNLOCK();
+#endif
}
ampintc_eoi(iack_val);
 
+
+
ampintc_splx(s);
 }
 
@@ -556,7 +588,8 @@ ampintc_intr_establish(int irqno, int level, int 
(*func)(void *),
ih = malloc(sizeof(*ih), M_DEVBUF, M_WAITOK);
ih->ih_func = func;
ih->ih_arg = arg;
-   ih->ih_ipl = level;
+   ih->ih_ipl = level &  IPL_IRQMASK;
+   ih->ih_flags = level & IPL_FLAGMASK;
ih->ih_irq = irqno;
ih->ih_name = name;
 
@@ -572,7 +605,7 @@ ampintc_intr_establish(int irqno, int level, int 
(*func)(void *),
name);
 #endif
ampintc_calc_mask();
-   
+
restore_interrupts(psw);
return (ih);
 }
@@ -594,6 +627,10 @@ ampintc_intr_disestablish(void *cookie)
TAILQ_REMOVE(>sc_ampintc_handler[ih->ih_irq].iq_list, ih, ih_list);
if (ih->ih_name != NULL)
evcount_detach(>ih_count);
+
+   if (TAILQ_EMPTY(>sc_ampintc_handler[ih->ih_irq].iq_list))
+   sc->sc_ampintc_handler[ih->ih_irq].iq_route = 0;
+
free(ih, M_DEVBUF, sizeof(*ih));
 
ampintc_calc_mask();
diff --git a/sys/arch/arm64/dev/bcm2836_intr.c 
b/sys/arch/arm64/dev/bcm2836_intr.c
index 16d6e1daa50..09fe633ee73 100644
--- a/sys/arch/arm64/dev/bcm2836_intr.c
+++ b/sys/arch/arm64/dev/bcm2836_intr.c
@@ -80,6 +80,7 @@ struct intrhand {
void *ih_arg;   /* arg for handler */
int ih_ipl; /* IPL_* */
int ih_irq; /* IRQ number */
+   int ih_flags;   /* flags, eg MPSAFE */
struct evcount ih_count;/* interrupt counter */
char *ih_name;  /* device name */
 };
@@ -204,7 +205,7 @@ bcm_intc_attach(struct device *parent, struct device *self, 
void *aux)
arm_set_intr_handler(bcm_intc_splraise, bcm_intc_spllower,
bcm_intc_splx, bcm_intc_setipl,
bcm_intc_intr_establish, bcm_intc_intr_disestablish,
-   bcm_intc_irq_handler);
+   bcm_intc_irq_handler, NULL);
 
sc->sc_intc.ic_node = faa->fa_node;
sc->sc_intc.ic_cookie = sc;
@@ -522,7 +523,8 @@ bcm_intc_intr_establish(int irqno, int level, int 
(*func)(void *),
ih = malloc(sizeof *ih, M_DEVBUF, M_WAITOK);
ih->ih_fun = func;
ih->ih_arg = arg;
-   ih->ih_ipl = level;
+   ih->ih_ipl = level &  IPL_IRQMASK;
+   ih->ih_flags = level & IPL_FLAGMASK;
ih->ih_irq = irqno;
ih->ih_name = name;
 
diff --git a/sys/arch/arm64/include/intr.h b/sys/arch/arm64/include/intr.h
index 10cbe793a93..85c0335255c 100644
--- a/sys/arch/arm64/include/intr.h
+++ b/sys/arch/arm64/include/intr.h
@@ -60,7 +60,9 @@
 #defineNIPL13  /* number of levels */
 
 /* Interrupt priority 'flags'. */
-#defineIPL_MPSAFE  0   /* no "mpsafe" interrupts */
+#defineIPL_IRQMASK 0x0f/* priority only */
+#defineIPL_FLAGMASK0xf00   /* flags only */
+#defineIPL_MPSAFE  0x100   /* no "mpsafe" interrupts */
 
 /* Interrupt sharing types. */
 #defineIST_NONE0   /* none */
@@ -88,7 +90,8 @@ void   arm_set_intr_handler(int (*raise)(int), int 
(*lower)(int),
 void *(*intr_establish)(int irqno, int level, int (*func)(void *),
 void *cookie, char *name),
 void (*intr_disestablish)(void *cookie),
-void (*intr_handle)(void *));
+void (*intr_handle)(void *),
+void (*route_ih)(void*, int, int));
 
 struct arm_intr_func {
int (*raise)(int);
@@ -98,6 +101,7 @@ struct arm_intr_func {
void *(*intr_establish)(int irqno, int level, int (*func)(void *),
void *cookie, char *name);
void (*intr_disestablish)(void *cookie);
+   void (*route_ih)(void *cookie, int enable, int cpuid);
 };
 
 extern struct arm_intr_func arm_intr_func;
@@ -131,6 +135,11 @@ extern uint32_t arm_smask[NIPL];
 
 #include 
 
+struct arm_intr_handle {
+   struct interrupt_controller *ih_ic;
+   void *ih_ih;
+};
+
 void   *arm_intr_establish(int irqno, int level, int (*func)(void *),
 void *cookie, char *name);
 voidarm_intr_disestablish(void *cookie);
@@ -153,6 +162,7 @@ struct interrupt_controller {
 
 voidarm_intr_init_fdt(void);
 voidarm_intr_register_fdt(struct interrupt_controller *);
+voidarm_intr_route_ih(void *, int, int);
 void   *arm_intr_establish_fdt(int, int, int (*)(void *),
void *, char *);
 void   *arm_intr_establish_fdt_idx(int, int, int, int (*)(void *),

Dale Rahn   dr...@dalerahn.com



Re: arm64 pmap fix: CANFAIL logic

2017-02-17 Thread Dale Rahn
On Fri, Feb 17, 2017 at 08:18:36PM +1100, Jonathan Gray wrote:
> On Fri, Feb 17, 2017 at 02:11:55AM -0500, Dale Rahn wrote:
> > The logic to handle PMAP_CANFAIL, the logic was inverted originally.
> > 
> > Code has been simplified so that it is test for if !CANFAIL then panic
> 
> Looks good, but how about using __func__?  Avoids going > 80 in one case.

This is a nice improvement over my proposal.

Thanks
Dale
> 
> Index: pmap.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 pmap.c
> --- pmap.c7 Feb 2017 23:05:33 -   1.20
> +++ pmap.c17 Feb 2017 09:13:44 -
> @@ -370,8 +370,9 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>   vp1 = pool_get(_vp_pool, vp_pool_flags);
>   if (vp1 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
> - return ENOMEM;
> - panic("unable to allocate L1");
> + panic("%s: unable to allocate L1",
> + __func__);
> + return ENOMEM;
>   }
>   pmap_set_l1(pm, va, vp1, 0);
>   }
> @@ -384,8 +385,8 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>   vp2 = pool_get(_vp_pool, vp_pool_flags);
>   if (vp2 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
> - return ENOMEM;
> - panic("unable to allocate L2");
> + panic("%s: unable to allocate L2", __func__);
> + return ENOMEM;
>   }
>   pmap_set_l2(pm, va, vp2, 0);
>   }
> @@ -395,8 +396,8 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, str
>   vp3 = pool_get(_vp_pool, vp_pool_flags);
>   if (vp3 == NULL) {
>   if ((flags & PMAP_CANFAIL) == 0)
> - return ENOMEM;
> - panic("unable to allocate L3");
> + panic("%s: unable to allocate L3", __func__);
> + return ENOMEM;
>   }
>   pmap_set_l3(pm, va, vp3, 0);
>   }
> @@ -503,19 +504,17 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>   if (pted == NULL) {
>   pted = pool_get(_pted_pool, PR_NOWAIT | PR_ZERO);
>   if (pted == NULL) {
> - if ((flags & PMAP_CANFAIL) == 0) {
> - error = ENOMEM;
> - goto out;
> - }
> - panic("pmap_enter: failed to allocate pted");
> + if ((flags & PMAP_CANFAIL) == 0)
> + panic("%s: failed to allocate pted", __func__);
> + error = ENOMEM;
> + goto out;
>   }
>   if (pmap_vp_enter(pm, va, pted, flags)) {
> - if ((flags & PMAP_CANFAIL) == 0) {
> - error = ENOMEM;
> - pool_put(_pted_pool, pted);
> - goto out;
> - }
> - panic("pmap_enter: failed to allocate L2/L3");
> + if ((flags & PMAP_CANFAIL) == 0)
> + panic("%s: failed to allocate L2/L3", __func__);
> + error = ENOMEM;
> + pool_put(_pted_pool, pted);
> + goto out;
>   }
>   }
>  
> 
Dale Rahn   dr...@dalerahn.com



arm64 ddb support improvement

2017-02-16 Thread Dale Rahn
e_el1_irq") == 0)) {
+   (*pr)("--- interrupt ---\n");
+   } else if (
+   (strcmp (name, "handle_el0_sync") == 0) ||
+   (strcmp (name, "handle_el1_sync") == 0)) {
+   (*pr)("--- trap ---\n");
+   }
+   }
+   if (INKERNEL(frame)) {
+   if (frame <= lastframe) {
+   (*pr)("Bad frame pointer: 0x%lx\n", frame);
+   break;
+   }
+   } else {
+   if (kernel_only)
+   break;
+   }
+   --count;
}
 }
Dale Rahn   dr...@dalerahn.com



arm64 pmap fix: CANFAIL logic

2017-02-16 Thread Dale Rahn
The logic to handle PMAP_CANFAIL, the logic was inverted originally.

Code has been simplified so that it is test for if !CANFAIL then panic

diff --git a/sys/arch/arm64/arm64/pmap.c b/sys/arch/arm64/arm64/pmap.c
index dfd91b4ce23..393ff202e39 100644
--- a/sys/arch/arm64/arm64/pmap.c
+++ b/sys/arch/arm64/arm64/pmap.c
@@ -370,8 +370,8 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, 
int flags)
vp1 = pool_get(_vp_pool, vp_pool_flags);
if (vp1 == NULL) {
if ((flags & PMAP_CANFAIL) == 0)
-   return ENOMEM;
-   panic("unable to allocate L1");
+   panic("pmap_vp_enter: unable to 
allocate L1");
+   return ENOMEM;
}
pmap_set_l1(pm, va, vp1, 0);
}
@@ -384,8 +384,8 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, 
int flags)
vp2 = pool_get(_vp_pool, vp_pool_flags);
if (vp2 == NULL) {
if ((flags & PMAP_CANFAIL) == 0)
-   return ENOMEM;
-   panic("unable to allocate L2");
+   panic("pmap_vp_enter: unable to allocate L2");
+   return ENOMEM;
}
pmap_set_l2(pm, va, vp2, 0);
}
@@ -395,8 +395,8 @@ pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, 
int flags)
vp3 = pool_get(_vp_pool, vp_pool_flags);
if (vp3 == NULL) {
if ((flags & PMAP_CANFAIL) == 0)
-   return ENOMEM;
-   panic("unable to allocate L3");
+   panic("pmap_vp_enter: unable to allocate L3");
+   return ENOMEM;
}
pmap_set_l3(pm, va, vp3, 0);
}
@@ -503,19 +503,17 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t 
prot, int flags)
if (pted == NULL) {
pted = pool_get(_pted_pool, PR_NOWAIT | PR_ZERO);
if (pted == NULL) {
-   if ((flags & PMAP_CANFAIL) == 0) {
-   error = ENOMEM;
-   goto out;
-   }
-   panic("pmap_enter: failed to allocate pted");
+   if ((flags & PMAP_CANFAIL) == 0)
+   panic("pmap_enter: failed to allocate pted");
+   error = ENOMEM;
+   goto out;
}
if (pmap_vp_enter(pm, va, pted, flags)) {
-   if ((flags & PMAP_CANFAIL) == 0) {
-   error = ENOMEM;
-   pool_put(_pted_pool, pted);
-   goto out;
-   }
-   panic("pmap_enter: failed to allocate L2/L3");
+   if ((flags & PMAP_CANFAIL) == 0)
+   panic("pmap_enter: failed to allocate L2/L3");
+   error = ENOMEM;
+   pool_put(_pted_pool, pted);
+   goto out;
}
}
 
Dale Rahn   dr...@dalerahn.com



Add sigchild handler to usbhidaction.

2011-10-15 Thread Dale Rahn
I first wrote this a while back but it got lost in my trees after I
updated to a recent snapshot I noticed I was out of processes again,
that I had  50 zombie processes sitting around.

I use usbhidaction to play/stop/skip music playing on my desktop
controlling xmms.

Without this patch, each time a key was pressed and xmms ran/exited it
would leave another zombie process.

Index: usbhidaction.c
===
RCS file: /cvs/src/usr.bin/usbhidaction/usbhidaction.c,v
retrieving revision 1.15
diff -u -p -r1.15 usbhidaction.c
--- usbhidaction.c  7 Mar 2011 14:59:06 -   1.15
+++ usbhidaction.c  15 Oct 2011 20:30:48 -
@@ -30,6 +30,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include sys/wait.h
 #include stdio.h
 #include stdlib.h
 #include string.h
@@ -79,6 +80,19 @@ sighup(int signo)
reparse = 1;
 }
 
+static void
+sigchild(int signo)
+{
+   int status;
+   pid_t pid;
+   status = 0;
+   do {
+   pid = wait4(WAIT_ANY, status, WNOHANG, NULL);
+   } while (pid != -1  pid != 0);
+
+   printf(child exited);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -159,6 +173,7 @@ main(int argc, char **argv)
errx(1, report too large);
 
(void)signal(SIGHUP, sighup);
+   (void)signal(SIGCHLD, sigchild);
 
if (demon) {
if (daemon(0, 0)  0)
Dale Rahn   dr...@dalerahn.com



Re: Add sigchild handler to usbhidaction.

2011-10-15 Thread Dale Rahn
On Sat, Oct 15, 2011 at 03:46:51PM -0500, Dale Rahn wrote:
 I first wrote this a while back but it got lost in my trees after I
 updated to a recent snapshot I noticed I was out of processes again,
 that I had  50 zombie processes sitting around.
 
 I use usbhidaction to play/stop/skip music playing on my desktop
 controlling xmms.
 
 Without this patch, each time a key was pressed and xmms ran/exited it
 would leave another zombie process.
 
So, I recieved some feedback about this, here is what should be a much better
fix.

Index: usbhidaction.c
===
RCS file: /cvs/src/usr.bin/usbhidaction/usbhidaction.c,v
retrieving revision 1.15
diff -u -p -r1.15 usbhidaction.c
--- usbhidaction.c  7 Mar 2011 14:59:06 -   1.15
+++ usbhidaction.c  15 Oct 2011 21:08:34 -
@@ -160,6 +160,9 @@ main(int argc, char **argv)
 
(void)signal(SIGHUP, sighup);
 
+   /* ignore the signal, allows init to reap any children that exit */
+   (void)signal(SIGCHLD, SIG_IGN);
+
if (demon) {
if (daemon(0, 0)  0)
err(1, daemon());
Dale Rahn   dr...@dalerahn.com



Re: powerpc and unaligned accesses

2011-07-27 Thread Dale Rahn
On Wed, Jul 27, 2011 at 05:45:56PM +0200, Christian Weisgerber wrote:
 Can PowerPC do unaligned accesses or not?
 
 I'm bringing this up *again*, because the fragmentary dialog on ICB
 about how powerpc is supposed to behave doesn't agree with observable
 reality.
 
 | miod some models can afford unaligned access with a msr setting,
 | but we frown upon this and won't enable this anyway
 | miod so: no.
 | deraadt we should keep running powerpc strict align.
 
 The reality is that we *do* support unaligned accesses, at least
 on the machines people test on.  sthen@ noticed when testing the
 proposed port for snappy (Google's fast compression library) that
 unaligned accesses work fine and is reluctant to disable them in
 the port.
 
 I just noticed that archivers/xz also uses unaligned accesses on
 powerpc and nobody has complained to me that it breaks.
 
 I don't have a problem with disabling unaligned accesses in ports
 on powerpc, but without errors we are unlikely to find all of them
 and what's the justification then?
 
 -- 
 Christian naddy Weisgerber  na...@mips.inka.de
 

Technically powerpc is a strict alignment machine, however it isn't pedantic
about it (in big endian mode, like we use it). If power is configured for
little endian, any misalignment is triggers and ALIGN execption, hoever
in big endian, generally only floating point operations trigger misalignment
exceptions. I know of no integer operations that will trigger an alignment
exception in big endian mode.

Dale Rahn   dr...@dalerahn.com



ld.so speedup for large binaries with many shared libraries

2011-04-23 Thread Dale Rahn
 refcount;   /* dep libs only */
int opencount;  /* # dlopen()  exe */
@@ -233,6 +236,7 @@ extern elf_object_t *_dl_objects;
 extern elf_object_t *_dl_last_object;
 
 extern elf_object_t *_dl_loading_object;
+void _dl_append_search(elf_object_t *object);
 
 extern const char *_dl_progname;
 extern struct r_debug *_dl_debug_map;
@@ -278,6 +282,7 @@ extern int _dl_symcachestat_hits;
 extern int _dl_symcachestat_lookups;
 TAILQ_HEAD(dlochld, dep_node);
 extern struct dlochld _dlopened_child_list;
+extern int _dl_search_list_valid;
 
 /* variables used to avoid duplicate node checking */
 int _dl_searchnum;
Dale Rahn   dr...@dalerahn.com



dont lose printfs in ddb to xconsole.

2011-04-03 Thread Dale Rahn
If xconsole is running (or xterm -C), if ddb is entered, the kernel printf
output will be queued for xconsole to print out, however if
one is on the text VT, it output appears to be missing.

This stores the fact that we have entered ddb and in that case it
acts as if xconsole is not running. however output continues to go to
xconsole if the user is able to return from ddb.

Index: ddb/db_trap.c
===
RCS file: /cvs/src/sys/ddb/db_trap.c,v
retrieving revision 1.15
diff -u -p -r1.15 db_trap.c
--- ddb/db_trap.c   27 Nov 2010 19:57:23 -  1.15
+++ ddb/db_trap.c   3 Apr 2011 14:10:49 -
@@ -48,6 +48,7 @@
 #include ddb/db_sym.h
 #include ddb/db_extern.h
 #include ddb/db_interface.h
+#include ddb/db_var.h
 
 void
 db_trap(int type, int code)
@@ -55,6 +56,7 @@ db_trap(int type, int code)
boolean_t   bkpt;
boolean_t   watchpt;
 
+   db_is_active = 1;
bkpt = IS_BREAKPOINT_TRAP(type, code);
watchpt = IS_WATCHPOINT_TRAP(type, code);
 
@@ -97,4 +99,5 @@ db_trap(int type, int code)
}
 
db_restart_at_pc(DDB_REGS, watchpt);
+   db_is_active = 0;
 }
Index: ddb/db_var.h
===
RCS file: /cvs/src/sys/ddb/db_var.h,v
retrieving revision 1.10
diff -u -p -r1.10 db_var.h
--- ddb/db_var.h8 Nov 2008 01:14:51 -   1.10
+++ ddb/db_var.h3 Apr 2011 14:11:27 -
@@ -65,10 +65,10 @@ extern int  db_max_line;
 extern int db_panic;
 extern int db_console;
 extern int db_log;
+extern int db_is_active;
 
 intddb_sysctl(int *, u_int, void *, size_t *, void *, size_t,
   struct proc *);
 #endif
 
 #endif /* _DDB_DB_VAR_H_ */
-
Index: kern/subr_prf.c
===
RCS file: /cvs/src/sys/kern/subr_prf.c,v
retrieving revision 1.75
diff -u -p -r1.75 subr_prf.c
--- kern/subr_prf.c 26 Jul 2010 01:56:27 -  1.75
+++ kern/subr_prf.c 3 Apr 2011 14:09:23 -
@@ -123,6 +123,11 @@ intdb_console = 1;
 #else
 intdb_console = 0;
 #endif
+
+/*
+ * flag to indicate if we are currently in ddb (on some processor)
+ */
+int db_is_active;
 #endif
 
 /*
@@ -324,10 +329,16 @@ void
 kputchar(int c, int flags, struct tty *tp)
 {
extern int msgbufmapped;
+   int ddb_active = 0;
+
+#ifdef DDB
+   ddb_active = db_is_active;
+#endif
 
if (panicstr)
constty = NULL;
-   if ((flags  TOCONS)  tp == NULL  constty) {
+
+   if ((flags  TOCONS)  tp == NULL  constty  !ddb_active) {
tp = constty;
flags |= TOTTY;
}
@@ -337,7 +348,7 @@ kputchar(int c, int flags, struct tty *t
if ((flags  TOLOG) 
c != '\0'  c != '\r'  c != 0177  msgbufmapped)
msgbuf_putchar(c);
-   if ((flags  TOCONS)  constty == NULL  c != '\0')
+   if ((flags  TOCONS)  (constty == NULL || ddb_active)  c != '\0')
(*v_putc)(c);
 #ifdef DDB
if (flags  TODDB)

Dale Rahn   dr...@dalerahn.com



uvm_km_pgremove_intrsafe interface change

2011-04-01 Thread Dale Rahn
the uvm_km_pgremove_intrsafe function requires pages to be mapped to free them
however it is then expected to call pmap_kremove(). This is
scary as in the page is freed, but is still mapped.

Have uvm_km_pgremove_intrsafe() unmap as well as free.

Index: uvm/uvm_glue.c
===
RCS file: /cvs/src/sys/uvm/uvm_glue.c,v
retrieving revision 1.56
diff -u -p -r1.56 uvm_glue.c
--- uvm/uvm_glue.c  1 Apr 2011 15:43:13 -   1.56
+++ uvm/uvm_glue.c  1 Apr 2011 18:08:52 -
@@ -260,7 +260,6 @@ uvm_vslock_device(struct proc *p, void *
return 0;
 
uvm_km_pgremove_intrsafe(sva, sva + sz);
-   pmap_kremove(sva, sz);
pmap_update(pmap_kernel());
 out_unmap:
uvm_km_free(kernel_map, sva, sz);
@@ -291,7 +290,6 @@ uvm_vsunlock_device(struct proc *p, void
 
kva = trunc_page((vaddr_t)map);
uvm_km_pgremove_intrsafe(kva, kva + sz);
-   pmap_kremove(kva, sz);
pmap_update(pmap_kernel());
uvm_km_free(kernel_map, kva, sz);
 }
Index: uvm/uvm_km.c
===
RCS file: /cvs/src/sys/uvm/uvm_km.c,v
retrieving revision 1.86
diff -u -p -r1.86 uvm_km.c
--- uvm/uvm_km.c26 Aug 2010 16:08:24 -  1.86
+++ uvm/uvm_km.c1 Apr 2011 18:25:51 -
@@ -326,6 +326,7 @@ uvm_km_pgremove_intrsafe(vaddr_t start, 
pg = PHYS_TO_VM_PAGE(pa);
if (pg == NULL)
panic(uvm_km_pgremove_intrsafe: no page);
+   pmap_kremove(va, PAGE_SIZE);
uvm_pagefree(pg);
}
 }
Index: uvm/uvm_map.c
===
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.131
diff -u -p -r1.131 uvm_map.c
--- uvm/uvm_map.c   24 Dec 2010 21:49:04 -  1.131
+++ uvm/uvm_map.c   1 Apr 2011 18:11:38 -
@@ -1639,7 +1639,6 @@ uvm_unmap_remove(struct vm_map *map, vad
}
} else if (map-flags  VM_MAP_INTRSAFE) {
uvm_km_pgremove_intrsafe(entry-start, entry-end);
-   pmap_kremove(entry-start, len);
} else if (UVM_ET_ISOBJ(entry) 
UVM_OBJ_IS_KERN_OBJECT(entry-object.uvm_obj)) {
KASSERT(vm_map_pmap(map) == pmap_kernel());

Dale Rahn   dr...@dalerahn.com



Re: ARM linker script

2010-10-13 Thread Dale Rahn
On Thu, Oct 14, 2010 at 01:50:36AM +0200, Tobias Ulmer wrote:
 I'm currently loading the kernel at a fixed start address using u-boot.
 There's no relocation going on.
 Accessing anything in the data segment leads to crashes due to file- and
 address offsets desyncing.
 
 The patch below fixes this issue and, as a bonus, syncs the code with
 the comment.
 Possibly affected are armish, beagle, gumstix, palm and zaurus
 
 Index: arch/arm/conf/ldscript.tail
 ===
 RCS file: 
 /srv/boron/data/vcs/cvs/openbsd/src/sys/arch/arm/conf/ldscript.tail,v
 retrieving revision 1.4
 diff -u -p -r1.4 ldscript.tail
 --- arch/arm/conf/ldscript.tail   27 Jun 2009 14:44:39 -  1.4
 +++ arch/arm/conf/ldscript.tail   13 Oct 2010 23:17:30 -
 @@ -5,7 +5,7 @@
PROVIDE (etext = .);
/* Adjust the address for the data segment to start on the next page
   boundary.  */
 -  . = ALIGN(0x8000);
 +  . = ALIGN(0x1000);
.data:
AT (LOADADDR(.text) + (ADDR(.data) - ADDR(.text)))
{
 

How are you loading the kernel with u-boot? 

When I was doing this in the past, I was doing the following
$ ${OBJCOPY} -O binary bsd bsd.img
$ mkuboot -a arm -o linux -e ${KERNEL_BASE_PHYS} -l ${KERNEL_BASE_PHYS} bsd.img 
bsd.umg

This method had no problem with the current 'alignment'

I thought I recalled that the ldscript aligned the pages on 64k boundaries,
but that does not appear to be the case. Not certian why 32k would be 
desireable.
Will look into this more.

Dale Rahn   dr...@dalerahn.com



gcc4 cross builds (patch)

2010-09-24 Thread Dale Rahn
 -  1.15
+++ gnu/usr.bin/cc/libgcc/Makefile  24 Sep 2010 16:39:23 -
@@ -8,9 +8,13 @@ GCCDIR= ${GCCLIB}/gcc
 
 .include ../Makefile.tgt
 
+.if defined(CROSSDIR)
+LD=${CROSSDIR}/usr/${GCC_TARGET}/bin/ld
+.endif
+
 .if defined(CROSS_TARGET)
 #Building cc for target, CC should already be the cross compiler
-.elif exists(${.CURDIR}/../cc/obj)
+.elif ${.OBJDIR} != ${.CURDIR}
 CC=${.OBJDIR}/../cc/cc -B ${.OBJDIR}/../cc1
 .else
 CC=${.CURDIR}/../cc/cc -B ${.CURDIR}/../cc1
Dale Rahn   dr...@dalerahn.com



Re: dlclose() segfault in _dl_thread_kern_go()

2010-03-23 Thread Dale Rahn
On Tue, Mar 23, 2010 at 12:35:33PM -0400, Mark Bucciarelli wrote:
 Hi,
 
 The simple test program below
 segfaults in _dl_thread_kern_go().
 
 I suspect the library, but am not sure
 how to proceed.
 
 Add printf()'s to dlfcn.c and rebuild
 ld.so?
 
 Hopefully, my test program is wrong
 somehow.  :)
 
 Thanks for any suggestions,
 

There is some detail missing from this report, eg where did libbat.so come
from? Also how was it built?

Perhaps the problem is that the library was built threaded -pthread
but the main program has not been build with threading enabled (-pthread
missing).

Dale Rahn   dr...@dalerahn.com



Re: VIA EHCI controller workaround needs testing.

2009-06-29 Thread Dale Rahn
On Mon, Jun 29, 2009 at 03:46:35PM -0400, Brad wrote:
 The following diff adds a workaround for an issue with the VIA VT6202
 EHCI controller hogging the PCI bus and causing poor performance for
 IDE and possibly other devices in the system.
 
 Please test if your system has a VIA VT6202 EHCI controller and
 provide a dmesg. If the workaround is being applied for the
 chipset you have then a message will be printed (this is temporary
 only to verify it is being applied for the appropriate revision
 of the chipset).
 
 
 From Linux
 
 

I tried out this patch, and found that the appropriate revision chip
is found in the thecus 2100 system (armish port). Initial testing showed
no performance difference. 

However, I looked at the diff more closely and found that it was wrong,
the original linux code would read/write one byte, however the OpenBSD
interface does word (32bit) reads and writes, howeve this register is
then misaligned.

After changing the code to do 32 bit accesses and to set the correct bit
in the little endian 32 bit register, the system saw a signficant performance
difference. dd if=/dev/zero of=file bs=1m saw a 10% speedup, and ftp
a file from the local network saw a 7% speedup.

Arm has a smaller cache that other systems, so may be affected by a
busy memory bus more than other platforms.

Working, tested diff (for armish).

I have not verified if this diff makes sense on big endian platforms yet.

Index: ehci_pci.c
===
RCS file: /cvs/src/sys/dev/pci/ehci_pci.c,v
retrieving revision 1.16
diff -u -u -r1.16 ehci_pci.c
--- ehci_pci.c  25 Jun 2009 01:01:44 -  1.16
+++ ehci_pci.c  30 Jun 2009 02:56:22 -
@@ -69,6 +69,7 @@
 
 #define EHCI_SBx00_WORKAROUND_REG  0x50
 #define EHCI_SBx00_WORKAROUND_ENABLE   (1  3)
+#define EHCI_VT6202_WORKAROUND_REG 0x48
 
 intehci_pci_match(struct device *, void *, void *);
 void   ehci_pci_attach(struct device *, struct device *, void *);
@@ -127,18 +128,41 @@
EOWRITE2(sc-sc, EHCI_USBINTR, 0);
 
/* Handle quirks */
-   if (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_ATI 
-   ((PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB600_EHCI ||
- (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB700_EHCI 
-  pci_find_device(NULL, ehci_sb700_match) {
-   pcireg_t value;
-
-   /* apply the ATI SB600/SB700 workaround */
-   value = pci_conf_read(sc-sc_pc, sc-sc_tag,
-   EHCI_SBx00_WORKAROUND_REG);
-   pci_conf_write(sc-sc_pc, sc-sc_tag,
-   EHCI_SBx00_WORKAROUND_REG, value |
-   EHCI_SBx00_WORKAROUND_ENABLE);
+   switch (PCI_VENDOR(pa-pa_id)) {
+   case PCI_VENDOR_ATI:
+   if (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB600_EHCI ||
+   (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB700_EHCI 
+pci_find_device(NULL, ehci_sb700_match))) {
+   pcireg_t value;
+
+   /* apply the ATI SB600/SB700 workaround */
+   value = pci_conf_read(sc-sc_pc, sc-sc_tag,
+   EHCI_SBx00_WORKAROUND_REG);
+   pci_conf_write(sc-sc_pc, sc-sc_tag,
+   EHCI_SBx00_WORKAROUND_REG, value |
+   EHCI_SBx00_WORKAROUND_ENABLE);
+   }
+   break;
+
+   case PCI_VENDOR_VIATECH:
+   if (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_VIATECH_VT6202 
+   (PCI_REVISION(pa-pa_class)  0xf0) == 0x60) {
+   pcireg_t value;
+
+   /*
+* The VT6202 defaults to a 1 usec EHCI sleep time
+* which hogs the PCI bus *badly*. Setting bit 5 of
+* the register makes that sleep time use the 
conventional
+* 10 usec.
+*/
+   value = pci_conf_read(sc-sc_pc, sc-sc_tag,
+   EHCI_VT6202_WORKAROUND_REG);
+   if ((value  0x2000) == 0)
+   printf(, applying VIA VT6202 workaround);
+   pci_conf_write(sc-sc_pc, sc-sc_tag,
+   EHCI_VT6202_WORKAROUND_REG, value | 0x2000);
+   }
+   break;
}
 
/* Map and establish the interrupt. */
Dale Rahn   dr...@dalerahn.com