Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Palmer Dabbelt
On Wed, 12 Jul 2017 05:40:49 PDT (-0700), boqun.f...@gmail.com wrote:
> On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote:
> [...]
>> diff --git a/arch/riscv/include/asm/bitops.h 
>> b/arch/riscv/include/asm/bitops.h
>> new file mode 100644
>> index ..b0a0c76e966a
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/bitops.h
>> @@ -0,0 +1,216 @@
>> +/*
>> + * Copyright (C) 2012 Regents of the University of California
>> + *
>> + *   This program is free software; you can redistribute it and/or
>> + *   modify it under the terms of the GNU General Public License
>> + *   as published by the Free Software Foundation, version 2.
>> + *
>> + *   This program is distributed in the hope that it will be useful,
>> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + *   GNU General Public License for more details.
>> + */
>> +
>> +#ifndef _ASM_RISCV_BITOPS_H
>> +#define _ASM_RISCV_BITOPS_H
>> +
>> +#ifndef _LINUX_BITOPS_H
>> +#error "Only  can be included directly"
>> +#endif /* _LINUX_BITOPS_H */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#ifndef smp_mb__before_clear_bit
>> +#define smp_mb__before_clear_bit()  smp_mb()
>> +#define smp_mb__after_clear_bit()   smp_mb()
>> +#endif /* smp_mb__before_clear_bit */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +
>> +#if (BITS_PER_LONG == 64)
>> +#define __AMO(op)   "amo" #op ".d"
>> +#elif (BITS_PER_LONG == 32)
>> +#define __AMO(op)   "amo" #op ".w"
>> +#else
>> +#error "Unexpected BITS_PER_LONG"
>> +#endif
>> +
>
> So the test_and_{set,change,clear}_bit() have the similar semantics as
> cmpxchg(), so
>
>> +#define __test_and_op_bit(op, mod, nr, addr)\
>> +({  \
>> +unsigned long __res, __mask;\
>> +__mask = BIT_MASK(nr);  \
>> +__asm__ __volatile__ (  \
>> +__AMO(op) " %0, %2, %1" \
>
> ... "aqrl" bit is needed here, and
>
>> +: "=r" (__res), "+A" (addr[BIT_WORD(nr)])   \
>> +: "r" (mod(__mask)));   \
>
> ... "memory" clobber is needed here.
>
>> +((__res & __mask) != 0);\
>> +})
>> +
>> +#define __op_bit(op, mod, nr, addr) \
>> +__asm__ __volatile__ (  \
>> +__AMO(op) " zero, %1, %0"   \
>> +: "+A" (addr[BIT_WORD(nr)]) \
>> +: "r" (mod(BIT_MASK(nr
>> +
>> +/* Bitmask modifiers */
>> +#define __NOP(x)(x)
>> +#define __NOT(x)(~(x))
>> +
>> +/**
>> + * test_and_set_bit - Set a bit and return its old value
>> + * @nr: Bit to set
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It may be reordered on other architectures than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(or, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_clear_bit - Clear a bit and return its old value
>> + * @nr: Bit to clear
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It can be reordered on other architectures other than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(and, __NOT, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_change_bit - Change a bit and return its old value
>> + * @nr: Bit to change
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(xor, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * set_bit - Atomically set a bit in memory
>> + * @nr: the bit to set
>> + * @addr: the address to start counting from
>> + *
>> + * This function is atomic and may not be reordered.  See __set_bit()
>
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.

I went ahead and fixed our comments.

  
https://github.com/riscv/riscv-linux/commit/38d727b99b9eb76fac533cebc23f89d364b7d60d

>
>> + * if you do not require the atomic guarantees.
>> + *
>> + * Note: there are no guarantees that this function will not be reordered
>> + * on non x86 

Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Palmer Dabbelt
On Wed, 12 Jul 2017 05:40:49 PDT (-0700), boqun.f...@gmail.com wrote:
> On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote:
> [...]
>> diff --git a/arch/riscv/include/asm/bitops.h 
>> b/arch/riscv/include/asm/bitops.h
>> new file mode 100644
>> index ..b0a0c76e966a
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/bitops.h
>> @@ -0,0 +1,216 @@
>> +/*
>> + * Copyright (C) 2012 Regents of the University of California
>> + *
>> + *   This program is free software; you can redistribute it and/or
>> + *   modify it under the terms of the GNU General Public License
>> + *   as published by the Free Software Foundation, version 2.
>> + *
>> + *   This program is distributed in the hope that it will be useful,
>> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + *   GNU General Public License for more details.
>> + */
>> +
>> +#ifndef _ASM_RISCV_BITOPS_H
>> +#define _ASM_RISCV_BITOPS_H
>> +
>> +#ifndef _LINUX_BITOPS_H
>> +#error "Only  can be included directly"
>> +#endif /* _LINUX_BITOPS_H */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#ifndef smp_mb__before_clear_bit
>> +#define smp_mb__before_clear_bit()  smp_mb()
>> +#define smp_mb__after_clear_bit()   smp_mb()
>> +#endif /* smp_mb__before_clear_bit */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +
>> +#if (BITS_PER_LONG == 64)
>> +#define __AMO(op)   "amo" #op ".d"
>> +#elif (BITS_PER_LONG == 32)
>> +#define __AMO(op)   "amo" #op ".w"
>> +#else
>> +#error "Unexpected BITS_PER_LONG"
>> +#endif
>> +
>
> So the test_and_{set,change,clear}_bit() have the similar semantics as
> cmpxchg(), so
>
>> +#define __test_and_op_bit(op, mod, nr, addr)\
>> +({  \
>> +unsigned long __res, __mask;\
>> +__mask = BIT_MASK(nr);  \
>> +__asm__ __volatile__ (  \
>> +__AMO(op) " %0, %2, %1" \
>
> ... "aqrl" bit is needed here, and
>
>> +: "=r" (__res), "+A" (addr[BIT_WORD(nr)])   \
>> +: "r" (mod(__mask)));   \
>
> ... "memory" clobber is needed here.
>
>> +((__res & __mask) != 0);\
>> +})
>> +
>> +#define __op_bit(op, mod, nr, addr) \
>> +__asm__ __volatile__ (  \
>> +__AMO(op) " zero, %1, %0"   \
>> +: "+A" (addr[BIT_WORD(nr)]) \
>> +: "r" (mod(BIT_MASK(nr
>> +
>> +/* Bitmask modifiers */
>> +#define __NOP(x)(x)
>> +#define __NOT(x)(~(x))
>> +
>> +/**
>> + * test_and_set_bit - Set a bit and return its old value
>> + * @nr: Bit to set
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It may be reordered on other architectures than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(or, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_clear_bit - Clear a bit and return its old value
>> + * @nr: Bit to clear
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It can be reordered on other architectures other than x86.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(and, __NOT, nr, addr);
>> +}
>> +
>> +/**
>> + * test_and_change_bit - Change a bit and return its old value
>> + * @nr: Bit to change
>> + * @addr: Address to count from
>> + *
>> + * This operation is atomic and cannot be reordered.
>> + * It also implies a memory barrier.
>> + */
>> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
>> +{
>> +return __test_and_op_bit(xor, __NOP, nr, addr);
>> +}
>> +
>> +/**
>> + * set_bit - Atomically set a bit in memory
>> + * @nr: the bit to set
>> + * @addr: the address to start counting from
>> + *
>> + * This function is atomic and may not be reordered.  See __set_bit()
>
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.

I went ahead and fixed our comments.

  
https://github.com/riscv/riscv-linux/commit/38d727b99b9eb76fac533cebc23f89d364b7d60d

>
>> + * if you do not require the atomic guarantees.
>> + *
>> + * Note: there are no guarantees that this function will not be reordered
>> + * on non x86 

Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Arnd Bergmann
On Wed, Jul 12, 2017 at 3:31 AM, Palmer Dabbelt  wrote:

> +/*
> + * FIXME: I'm flip-flopping on whether or not we should keep this or enforce
> + * the ordering with I/O on spinlocks.  The worry is that drivers won't get
> + * this correct, but I also don't want to introduce a fence into the lock 
> code
> + * that otherwise only uses AMOs and LR/SC.   For now I'm leaving this here:
> + * "w,o" is sufficient to ensure that all writes to the device has completed
> + * before the write to the spinlock is allowed to commit.  I surmised this 
> from
> + * reading "ACQUIRES VS I/O ACCESSES" in memory-barriers.txt.
> + */
> +#define mmiowb()   __asm__ __volatile__ ("fence o,w" : : : "memory");

Not sure if this has been discussed before, but have you considered doing
it like powerpc with a per-cpu flag that gets checked by spin_unlock()?



> +/*
> + * Relaxed I/O memory access primitives. These follow the Device memory
> + * ordering rules but do not guarantee any ordering relative to Normal memory
> + * accesses.  The memory barriers here are necessary as RISC-V doesn't define
> + * any ordering constraints on accesses to the device I/O space.  These are
> + * defined to order the indicated access (either a read or write) with all
> + * other I/O memory accesses.
> + */
> +/*
> + * FIXME: The platform spec will define the default Linux-capable platform to
> + * have some extra IO ordering constraints that will make these fences
> + * unnecessary.
> + */
> +#define __iorrmb() __asm__ __volatile__ ("fence i,io" : : : "memory");
> +#define __iorwmb() __asm__ __volatile__ ("fence io,o" : : : "memory");
> +
> +#define readb_relaxed(c)   ({ u8  __v = readb_cpu(c); __iorrmb(); __v; })
> +#define readw_relaxed(c)   ({ u16 __v = readw_cpu(c); __iorrmb(); __v; })
> +#define readl_relaxed(c)   ({ u32 __v = readl_cpu(c); __iorrmb(); __v; })
> +#define readq_relaxed(c)   ({ u64 __v = readq_cpu(c); __iorrmb(); __v; })
> +
> +#define writeb_relaxed(v,c)({ __iorwmb(); writeb_cpu((v),(c)); })
> +#define writew_relaxed(v,c)({ __iorwmb(); writew_cpu((v),(c)); })
> +#define writel_relaxed(v,c)({ __iorwmb(); writel_cpu((v),(c)); })
> +#define writeq_relaxed(v,c)({ __iorwmb(); writeq_cpu((v),(c)); })

What are the ordering rules for a write followed by a read? You say
that there are no ordering constraints on I/O access at all, but you don't
add any barriers between that pair, which is probably the most important
to get right.

Also, can you say here why the architecture is defined like this?
I'm sure that there have been very careful considerations on what
the semantics should be, yet it appears that MMIO access is defined
to be so relaxed purely as an optimization that as a consequence
leads to much slower I/O than a more typical definition as the operating
system has to add much heavier barriers than would otherwise be
needed.

> +/*
> + * I/O memory access primitives. Reads are ordered relative to any
> + * following Normal memory access. Writes are ordered relative to any prior
> + * Normal memory access.  The memory barriers here are necessary as RISC-V
> + * doesn't define any ordering between the memory space and the I/O space.
> + * They may be stronger than necessary ("i,r" and "w,o" might be sufficient),
> + * but I feel kind of queasy making these weaker in any manner than the 
> relaxed
> + * versions above.
> + */
> +#define __iormb()  __asm__ __volatile__ ("fence i,ior" : : : "memory");
> +#define __iowmb()  __asm__ __volatile__ ("fence iow,o" : : : "memory");
> +
> +#define readb(c)   ({ u8  __v = readb_cpu(c); __iormb(); __v; })
> +#define readw(c)   ({ u16 __v = readw_cpu(c); __iormb(); __v; })
> +#define readl(c)   ({ u32 __v = readl_cpu(c); __iormb(); __v; })
> +#define readq(c)   ({ u64 __v = readq_cpu(c); __iormb(); __v; })
> +
> +#define writeb(v,c)({ __iowmb(); writeb_cpu((v),(c)); })
> +#define writew(v,c)({ __iowmb(); writew_cpu((v),(c)); })
> +#define writel(v,c)({ __iowmb(); writel_cpu((v),(c)); })
> +#define writeq(v,c)({ __iowmb(); writeq_cpu((v),(c)); })

These look good to me (once the _relaxed version has been clarified, the
same question applies here as well). I see that you don't override the
PCI I/O space accessors (inb/outb etc), which have even stricter
ordering requirements.
I think you also need a barrier after outb/outw/outl to ensure that the write
has completed on the bus before anything else happens.

Finally, I think you also need to override the string functions
(readsl/writesl/insl/outsl) that are lacking barriers in the asm-generic
version.

 Arnd


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Arnd Bergmann
On Wed, Jul 12, 2017 at 3:31 AM, Palmer Dabbelt  wrote:

> +/*
> + * FIXME: I'm flip-flopping on whether or not we should keep this or enforce
> + * the ordering with I/O on spinlocks.  The worry is that drivers won't get
> + * this correct, but I also don't want to introduce a fence into the lock 
> code
> + * that otherwise only uses AMOs and LR/SC.   For now I'm leaving this here:
> + * "w,o" is sufficient to ensure that all writes to the device has completed
> + * before the write to the spinlock is allowed to commit.  I surmised this 
> from
> + * reading "ACQUIRES VS I/O ACCESSES" in memory-barriers.txt.
> + */
> +#define mmiowb()   __asm__ __volatile__ ("fence o,w" : : : "memory");

Not sure if this has been discussed before, but have you considered doing
it like powerpc with a per-cpu flag that gets checked by spin_unlock()?



> +/*
> + * Relaxed I/O memory access primitives. These follow the Device memory
> + * ordering rules but do not guarantee any ordering relative to Normal memory
> + * accesses.  The memory barriers here are necessary as RISC-V doesn't define
> + * any ordering constraints on accesses to the device I/O space.  These are
> + * defined to order the indicated access (either a read or write) with all
> + * other I/O memory accesses.
> + */
> +/*
> + * FIXME: The platform spec will define the default Linux-capable platform to
> + * have some extra IO ordering constraints that will make these fences
> + * unnecessary.
> + */
> +#define __iorrmb() __asm__ __volatile__ ("fence i,io" : : : "memory");
> +#define __iorwmb() __asm__ __volatile__ ("fence io,o" : : : "memory");
> +
> +#define readb_relaxed(c)   ({ u8  __v = readb_cpu(c); __iorrmb(); __v; })
> +#define readw_relaxed(c)   ({ u16 __v = readw_cpu(c); __iorrmb(); __v; })
> +#define readl_relaxed(c)   ({ u32 __v = readl_cpu(c); __iorrmb(); __v; })
> +#define readq_relaxed(c)   ({ u64 __v = readq_cpu(c); __iorrmb(); __v; })
> +
> +#define writeb_relaxed(v,c)({ __iorwmb(); writeb_cpu((v),(c)); })
> +#define writew_relaxed(v,c)({ __iorwmb(); writew_cpu((v),(c)); })
> +#define writel_relaxed(v,c)({ __iorwmb(); writel_cpu((v),(c)); })
> +#define writeq_relaxed(v,c)({ __iorwmb(); writeq_cpu((v),(c)); })

What are the ordering rules for a write followed by a read? You say
that there are no ordering constraints on I/O access at all, but you don't
add any barriers between that pair, which is probably the most important
to get right.

Also, can you say here why the architecture is defined like this?
I'm sure that there have been very careful considerations on what
the semantics should be, yet it appears that MMIO access is defined
to be so relaxed purely as an optimization that as a consequence
leads to much slower I/O than a more typical definition as the operating
system has to add much heavier barriers than would otherwise be
needed.

> +/*
> + * I/O memory access primitives. Reads are ordered relative to any
> + * following Normal memory access. Writes are ordered relative to any prior
> + * Normal memory access.  The memory barriers here are necessary as RISC-V
> + * doesn't define any ordering between the memory space and the I/O space.
> + * They may be stronger than necessary ("i,r" and "w,o" might be sufficient),
> + * but I feel kind of queasy making these weaker in any manner than the 
> relaxed
> + * versions above.
> + */
> +#define __iormb()  __asm__ __volatile__ ("fence i,ior" : : : "memory");
> +#define __iowmb()  __asm__ __volatile__ ("fence iow,o" : : : "memory");
> +
> +#define readb(c)   ({ u8  __v = readb_cpu(c); __iormb(); __v; })
> +#define readw(c)   ({ u16 __v = readw_cpu(c); __iormb(); __v; })
> +#define readl(c)   ({ u32 __v = readl_cpu(c); __iormb(); __v; })
> +#define readq(c)   ({ u64 __v = readq_cpu(c); __iormb(); __v; })
> +
> +#define writeb(v,c)({ __iowmb(); writeb_cpu((v),(c)); })
> +#define writew(v,c)({ __iowmb(); writew_cpu((v),(c)); })
> +#define writel(v,c)({ __iowmb(); writel_cpu((v),(c)); })
> +#define writeq(v,c)({ __iowmb(); writeq_cpu((v),(c)); })

These look good to me (once the _relaxed version has been clarified, the
same question applies here as well). I see that you don't override the
PCI I/O space accessors (inb/outb etc), which have even stricter
ordering requirements.
I think you also need a barrier after outb/outw/outl to ensure that the write
has completed on the bus before anything else happens.

Finally, I think you also need to override the string functions
(readsl/writesl/insl/outsl) that are lacking barriers in the asm-generic
version.

 Arnd


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Peter Zijlstra
On Wed, Jul 12, 2017 at 08:40:49PM +0800, Boqun Feng wrote:
> > +/**
> > + * set_bit - Atomically set a bit in memory
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This function is atomic and may not be reordered.  See __set_bit()
> 
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.

Yeah, I suspect that's an x86 special (all our atomics are fully
ordered).

> > +/**
> > + * test_and_set_bit_lock - Set a bit and return its old value, for lock
> > + * @nr: Bit to set
> > + * @addr: Address to count from
> > + *
> > + * This operation is atomic and provides acquire barrier semantics.
> > + * It can be used to implement bit locks.
> > + */
> > +static inline int test_and_set_bit_lock(
> > +   unsigned long nr, volatile unsigned long *addr)
> > +{
> > +   return test_and_set_bit(nr, addr);
> 
> If you want, you can open code an "amoor.aq" here, because
> test_and_set_bit_lock() only needs an acquire barrier.
> 
> > +}
> > +
> > +/**
> > + * clear_bit_unlock - Clear a bit in memory, for unlock
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This operation is atomic and provides release barrier semantics.
> > + */
> > +static inline void clear_bit_unlock(
> > +   unsigned long nr, volatile unsigned long *addr)
> > +{
> 
> You need a smp_mb__before_atomic() here, because clear_bit() is only
> relaxed atomic. And clear_bit_unlock() is a release.

alternatively you can do "amoand.rl".


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Peter Zijlstra
On Wed, Jul 12, 2017 at 08:40:49PM +0800, Boqun Feng wrote:
> > +/**
> > + * set_bit - Atomically set a bit in memory
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This function is atomic and may not be reordered.  See __set_bit()
> 
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.

Yeah, I suspect that's an x86 special (all our atomics are fully
ordered).

> > +/**
> > + * test_and_set_bit_lock - Set a bit and return its old value, for lock
> > + * @nr: Bit to set
> > + * @addr: Address to count from
> > + *
> > + * This operation is atomic and provides acquire barrier semantics.
> > + * It can be used to implement bit locks.
> > + */
> > +static inline int test_and_set_bit_lock(
> > +   unsigned long nr, volatile unsigned long *addr)
> > +{
> > +   return test_and_set_bit(nr, addr);
> 
> If you want, you can open code an "amoor.aq" here, because
> test_and_set_bit_lock() only needs an acquire barrier.
> 
> > +}
> > +
> > +/**
> > + * clear_bit_unlock - Clear a bit in memory, for unlock
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This operation is atomic and provides release barrier semantics.
> > + */
> > +static inline void clear_bit_unlock(
> > +   unsigned long nr, volatile unsigned long *addr)
> > +{
> 
> You need a smp_mb__before_atomic() here, because clear_bit() is only
> relaxed atomic. And clear_bit_unlock() is a release.

alternatively you can do "amoand.rl".


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Boqun Feng
On Wed, Jul 12, 2017 at 08:40:49PM +0800, Boqun Feng wrote:
[...]
> > +/**
> > + * set_bit - Atomically set a bit in memory
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This function is atomic and may not be reordered.  See __set_bit()
> 
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.
> 
> > + * if you do not require the atomic guarantees.
> > + *
> > + * Note: there are no guarantees that this function will not be reordered
> > + * on non x86 architectures, so if you are writing portable code,
> > + * make sure not to rely on its reordering guarantees.
> > + *

Hmmm.. the claim is right here ;-/

As your implementation is relax semantics, you'd better rewrite the
comment ;-)

Regards,
Boqun


signature.asc
Description: PGP signature


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Boqun Feng
On Wed, Jul 12, 2017 at 08:40:49PM +0800, Boqun Feng wrote:
[...]
> > +/**
> > + * set_bit - Atomically set a bit in memory
> > + * @nr: the bit to set
> > + * @addr: the address to start counting from
> > + *
> > + * This function is atomic and may not be reordered.  See __set_bit()
> 
> This is incorrect, {set,change,clear}_bit() can be reordered, see
> Documentation/memory-barriers.txt, they are just relaxed atomics. But I
> think you just copy this from x86 code, so maybe x86 code needs help
> too, at least claim that's only x86-specific guarantee.
> 
> > + * if you do not require the atomic guarantees.
> > + *
> > + * Note: there are no guarantees that this function will not be reordered
> > + * on non x86 architectures, so if you are writing portable code,
> > + * make sure not to rely on its reordering guarantees.
> > + *

Hmmm.. the claim is right here ;-/

As your implementation is relax semantics, you'd better rewrite the
comment ;-)

Regards,
Boqun


signature.asc
Description: PGP signature


Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Boqun Feng
On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote:
[...]
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> new file mode 100644
> index ..b0a0c76e966a
> --- /dev/null
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -0,0 +1,216 @@
> +/*
> + * Copyright (C) 2012 Regents of the University of California
> + *
> + *   This program is free software; you can redistribute it and/or
> + *   modify it under the terms of the GNU General Public License
> + *   as published by the Free Software Foundation, version 2.
> + *
> + *   This program is distributed in the hope that it will be useful,
> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *   GNU General Public License for more details.
> + */
> +
> +#ifndef _ASM_RISCV_BITOPS_H
> +#define _ASM_RISCV_BITOPS_H
> +
> +#ifndef _LINUX_BITOPS_H
> +#error "Only  can be included directly"
> +#endif /* _LINUX_BITOPS_H */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#ifndef smp_mb__before_clear_bit
> +#define smp_mb__before_clear_bit()  smp_mb()
> +#define smp_mb__after_clear_bit()   smp_mb()
> +#endif /* smp_mb__before_clear_bit */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#if (BITS_PER_LONG == 64)
> +#define __AMO(op)"amo" #op ".d"
> +#elif (BITS_PER_LONG == 32)
> +#define __AMO(op)"amo" #op ".w"
> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +

So the test_and_{set,change,clear}_bit() have the similar semantics as
cmpxchg(), so

> +#define __test_and_op_bit(op, mod, nr, addr) \
> +({   \
> + unsigned long __res, __mask;\
> + __mask = BIT_MASK(nr);  \
> + __asm__ __volatile__ (  \
> + __AMO(op) " %0, %2, %1" \

... "aqrl" bit is needed here, and

> + : "=r" (__res), "+A" (addr[BIT_WORD(nr)])   \
> + : "r" (mod(__mask)));   \

... "memory" clobber is needed here.

> + ((__res & __mask) != 0);\
> +})
> +
> +#define __op_bit(op, mod, nr, addr)  \
> + __asm__ __volatile__ (  \
> + __AMO(op) " zero, %1, %0"   \
> + : "+A" (addr[BIT_WORD(nr)]) \
> + : "r" (mod(BIT_MASK(nr
> +
> +/* Bitmask modifiers */
> +#define __NOP(x) (x)
> +#define __NOT(x) (~(x))
> +
> +/**
> + * test_and_set_bit - Set a bit and return its old value
> + * @nr: Bit to set
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It may be reordered on other architectures than x86.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(or, __NOP, nr, addr);
> +}
> +
> +/**
> + * test_and_clear_bit - Clear a bit and return its old value
> + * @nr: Bit to clear
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It can be reordered on other architectures other than x86.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(and, __NOT, nr, addr);
> +}
> +
> +/**
> + * test_and_change_bit - Change a bit and return its old value
> + * @nr: Bit to change
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(xor, __NOP, nr, addr);
> +}
> +
> +/**
> + * set_bit - Atomically set a bit in memory
> + * @nr: the bit to set
> + * @addr: the address to start counting from
> + *
> + * This function is atomic and may not be reordered.  See __set_bit()

This is incorrect, {set,change,clear}_bit() can be reordered, see
Documentation/memory-barriers.txt, they are just relaxed atomics. But I
think you just copy this from x86 code, so maybe x86 code needs help
too, at least claim that's only x86-specific guarantee.

> + * if you do not require the atomic guarantees.
> + *
> + * Note: there are no guarantees that this function will not be reordered
> + * on non x86 architectures, so if you are writing portable code,
> + * make sure not to rely on its reordering guarantees.
> + *
> + * Note that @nr may be almost arbitrarily large; this function is not
> + * restricted to acting on a single-word quantity.
> + */
> +static inline void set_bit(int nr, volatile unsigned long *addr)
> +{
> + __op_bit(or, __NOP, 

Re: [PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-12 Thread Boqun Feng
On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote:
[...]
> diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
> new file mode 100644
> index ..b0a0c76e966a
> --- /dev/null
> +++ b/arch/riscv/include/asm/bitops.h
> @@ -0,0 +1,216 @@
> +/*
> + * Copyright (C) 2012 Regents of the University of California
> + *
> + *   This program is free software; you can redistribute it and/or
> + *   modify it under the terms of the GNU General Public License
> + *   as published by the Free Software Foundation, version 2.
> + *
> + *   This program is distributed in the hope that it will be useful,
> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *   GNU General Public License for more details.
> + */
> +
> +#ifndef _ASM_RISCV_BITOPS_H
> +#define _ASM_RISCV_BITOPS_H
> +
> +#ifndef _LINUX_BITOPS_H
> +#error "Only  can be included directly"
> +#endif /* _LINUX_BITOPS_H */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#ifndef smp_mb__before_clear_bit
> +#define smp_mb__before_clear_bit()  smp_mb()
> +#define smp_mb__after_clear_bit()   smp_mb()
> +#endif /* smp_mb__before_clear_bit */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#if (BITS_PER_LONG == 64)
> +#define __AMO(op)"amo" #op ".d"
> +#elif (BITS_PER_LONG == 32)
> +#define __AMO(op)"amo" #op ".w"
> +#else
> +#error "Unexpected BITS_PER_LONG"
> +#endif
> +

So the test_and_{set,change,clear}_bit() have the similar semantics as
cmpxchg(), so

> +#define __test_and_op_bit(op, mod, nr, addr) \
> +({   \
> + unsigned long __res, __mask;\
> + __mask = BIT_MASK(nr);  \
> + __asm__ __volatile__ (  \
> + __AMO(op) " %0, %2, %1" \

... "aqrl" bit is needed here, and

> + : "=r" (__res), "+A" (addr[BIT_WORD(nr)])   \
> + : "r" (mod(__mask)));   \

... "memory" clobber is needed here.

> + ((__res & __mask) != 0);\
> +})
> +
> +#define __op_bit(op, mod, nr, addr)  \
> + __asm__ __volatile__ (  \
> + __AMO(op) " zero, %1, %0"   \
> + : "+A" (addr[BIT_WORD(nr)]) \
> + : "r" (mod(BIT_MASK(nr
> +
> +/* Bitmask modifiers */
> +#define __NOP(x) (x)
> +#define __NOT(x) (~(x))
> +
> +/**
> + * test_and_set_bit - Set a bit and return its old value
> + * @nr: Bit to set
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It may be reordered on other architectures than x86.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(or, __NOP, nr, addr);
> +}
> +
> +/**
> + * test_and_clear_bit - Clear a bit and return its old value
> + * @nr: Bit to clear
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It can be reordered on other architectures other than x86.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(and, __NOT, nr, addr);
> +}
> +
> +/**
> + * test_and_change_bit - Change a bit and return its old value
> + * @nr: Bit to change
> + * @addr: Address to count from
> + *
> + * This operation is atomic and cannot be reordered.
> + * It also implies a memory barrier.
> + */
> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
> +{
> + return __test_and_op_bit(xor, __NOP, nr, addr);
> +}
> +
> +/**
> + * set_bit - Atomically set a bit in memory
> + * @nr: the bit to set
> + * @addr: the address to start counting from
> + *
> + * This function is atomic and may not be reordered.  See __set_bit()

This is incorrect, {set,change,clear}_bit() can be reordered, see
Documentation/memory-barriers.txt, they are just relaxed atomics. But I
think you just copy this from x86 code, so maybe x86 code needs help
too, at least claim that's only x86-specific guarantee.

> + * if you do not require the atomic guarantees.
> + *
> + * Note: there are no guarantees that this function will not be reordered
> + * on non x86 architectures, so if you are writing portable code,
> + * make sure not to rely on its reordering guarantees.
> + *
> + * Note that @nr may be almost arbitrarily large; this function is not
> + * restricted to acting on a single-word quantity.
> + */
> +static inline void set_bit(int nr, volatile unsigned long *addr)
> +{
> + __op_bit(or, __NOP, 

[PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-11 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 328 
 arch/riscv/include/asm/barrier.h|  68 +++
 arch/riscv/include/asm/bitops.h | 216 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 134 +
 arch/riscv/include/asm/io.h | 180 ++
 arch/riscv/include/asm/spinlock.h   | 165 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1251 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..ee3ab06e492b
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)   
\
+   : "memory");
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)   \
+static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, 

[PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-11 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 328 
 arch/riscv/include/asm/barrier.h|  68 +++
 arch/riscv/include/asm/bitops.h | 216 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 134 +
 arch/riscv/include/asm/io.h | 180 ++
 arch/riscv/include/asm/spinlock.h   | 165 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1251 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..ee3ab06e492b
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)   
\
+   : "memory");
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)   \
+static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, 
atomic##prefix##_t *v)   

[PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 328 
 arch/riscv/include/asm/barrier.h|  68 +++
 arch/riscv/include/asm/bitops.h | 216 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 134 +
 arch/riscv/include/asm/io.h | 180 ++
 arch/riscv/include/asm/spinlock.h   | 165 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1251 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..ee3ab06e492b
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)   
\
+   : "memory");
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)   \
+static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, 

[PATCH 10/17] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 328 
 arch/riscv/include/asm/barrier.h|  68 +++
 arch/riscv/include/asm/bitops.h | 216 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 134 +
 arch/riscv/include/asm/io.h | 180 ++
 arch/riscv/include/asm/spinlock.h   | 165 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1251 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..ee3ab06e492b
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)   
\
+   : "memory");
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)   \
+static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, 
atomic##prefix##_t *v)