Re: svn commit: r332389 - head/sys/net

2018-04-11 Thread K. Macy
Yup. git<->phab patch update fail. Maybe some day we can move to git.
-M

On Wed, Apr 11, 2018 at 10:12 AM, Jonathan T. Looney  wrote:
> It appears this is causing panics (see the emails on freebsd-current@).
>
> From a brief glance, it appears that the new STATE_LOCK macros is used (and
> causes panics), but the STATE_LOCK_INIT macro is not used. Is it possible
> the code is missing an initialization call for the new mutex?
>
> Jonathan
>
> On Tue, Apr 10, 2018 at 3:48 PM, Stephen Hurd  wrote:
>
>> Author: shurd
>> Date: Tue Apr 10 19:48:24 2018
>> New Revision: 332389
>> URL: https://svnweb.freebsd.org/changeset/base/332389
>>
>> Log:
>>   Split out flag manipulation from general context manipulation in iflib
>>
>>   To avoid blocking on the context lock in the swi thread and risk
>> potential
>>   deadlocks, this change protects lighter weight updates that only need to
>>   be consistent with each other with their own lock.
>>
>>   Submitted by: Matthew Macy 
>>   Reviewed by:  shurd
>>   Sponsored by: Limelight Networks
>>   Differential Revision:https://reviews.freebsd.org/D14967
>>
>> Modified:
>>   head/sys/net/iflib.c
>>
>> Modified: head/sys/net/iflib.c
>> 
>> ==
>> --- head/sys/net/iflib.cTue Apr 10 19:42:50 2018(r332388)
>> +++ head/sys/net/iflib.cTue Apr 10 19:48:24 2018(r332389)
>> @@ -1,5 +1,5 @@
>>  /*-
>> - * Copyright (c) 2014-2017, Matthew Macy 
>> + * Copyright (c) 2014-2018, Matthew Macy 
>>   * All rights reserved.
>>   *
>>   * Redistribution and use in source and binary forms, with or without
>> @@ -163,7 +163,8 @@ struct iflib_ctx {
>> if_shared_ctx_t ifc_sctx;
>> struct if_softc_ctx ifc_softc_ctx;
>>
>> -   struct mtx ifc_mtx;
>> +   struct mtx ifc_ctx_mtx;
>> +   struct mtx ifc_state_mtx;
>>
>> uint16_t ifc_nhwtxqs;
>> uint16_t ifc_nhwrxqs;
>> @@ -318,8 +319,10 @@ typedef struct iflib_sw_tx_desc_array {
>>  #defineIFC_INIT_DONE   0x020
>>  #defineIFC_PREFETCH0x040
>>  #defineIFC_DO_RESET0x080
>> -#defineIFC_CHECK_HUNG  0x100
>> +#defineIFC_DO_WATCHDOG 0x100
>> +#defineIFC_CHECK_HUNG  0x200
>>
>> +
>>  #define CSUM_OFFLOAD   (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
>>  CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
>>  CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
>> @@ -535,13 +538,19 @@ rxd_info_zero(if_rxd_info_t ri)
>>
>>  #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) &
>> IFF_DRV_RUNNING))
>>
>> -#define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_mtx, _name,
>> "iflib ctx lock", MTX_DEF)
>> +#define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_ctx_mtx, _name,
>> "iflib ctx lock", MTX_DEF)
>> +#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_ctx_mtx)
>> +#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_ctx_mtx)
>> +#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_ctx_mtx)
>>
>> -#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx)
>> -#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx)
>> -#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
>>
>> +#define STATE_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_state_mtx,
>> _name, "iflib state lock", MTX_DEF)
>> +#define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx)
>> +#define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx)
>> +#define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx)
>>
>> +
>> +
>>  #define CALLOUT_LOCK(txq)  mtx_lock(&txq->ift_mtx)
>>  #define CALLOUT_UNLOCK(txq)mtx_unlock(&txq->ift_mtx)
>>
>> @@ -2144,18 +2153,14 @@ iflib_timer(void *arg)
>> if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
>> callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
>> txq->ift_timer.c_cpu);
>> return;
>> -hung:
>> -   CTX_LOCK(ctx);
>> -   if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
>> + hung:
>> device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
>>   txq->ift_id, TXQ_AVAIL(txq),
>> txq->ift_pidx);
>> -
>> -   IFDI_WATCHDOG_RESET(ctx);
>> -   ctx->ifc_watchdog_events++;
>> -
>> -   ctx->ifc_flags |= IFC_DO_RESET;
>> +   STATE_LOCK(ctx);
>> +   if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
>> +   ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET);
>> iflib_admin_intr_deferred(ctx);
>> -   CTX_UNLOCK(ctx);
>> +   STATE_UNLOCK(ctx);
>>  }
>>
>>  static void
>> @@ -2673,10 +2678,10 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
>> return true;
>> return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
>>  err:
>> -   CTX_LOCK(ctx);
>> +   STATE_LOCK(ctx);
>> ctx->ifc_flags |= IFC_DO_RESET;
>> iflib_admin_intr_deferred(ctx);
>> -   CTX_UNLOCK(ctx);
>> + 

Re: svn commit: r332419 - head/sys/net

2018-04-11 Thread K. Macy
There was a panic inducing merge error, please apply if you encounter problems:

@ -2288,7 +2294,7 @@ iflib_stop(if_ctx_t ctx)
for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
/* make sure all transmitters have completed before
proceeding XXX */

-   for (j = 0, di = txq->ift_ifdi; j < rxq->ifr_nfl; j++, di++)
+   for (j = 0, di = rxq->ifr_ifdi; j < rxq->ifr_nfl; j++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
/* also resets the free lists pidx/cidx */
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)

On Wed, Apr 11, 2018 at 2:41 PM, Stephen Hurd  wrote:
> Author: shurd
> Date: Wed Apr 11 21:41:59 2018
> New Revision: 332419
> URL: https://svnweb.freebsd.org/changeset/base/332419
>
> Log:
>   Properly initialize ifc_nhwtxqs.
>
>   Also, since ifc_nhwrxqs is only used in one place, remove it from the 
> struct.
>   This was preventing iflib_dma_free() from being called via
>   iflib_device_detach().
>
>   Submitted by: Matthew Macy 
>   Reviewed by:  shurd
>   Sponsored by: Limelight Networks
>
> Modified:
>   head/sys/net/iflib.c
>
> Modified: head/sys/net/iflib.c
> ==
> --- head/sys/net/iflib.cWed Apr 11 20:04:06 2018(r332418)
> +++ head/sys/net/iflib.cWed Apr 11 21:41:59 2018(r332419)
> @@ -166,7 +166,6 @@ struct iflib_ctx {
> struct mtx ifc_mtx;
>
> uint16_t ifc_nhwtxqs;
> -   uint16_t ifc_nhwrxqs;
>
> iflib_txq_t ifc_txqs;
> iflib_rxq_t ifc_rxqs;
> @@ -2289,7 +2288,7 @@ iflib_stop(if_ctx_t ctx)
> for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
> /* make sure all transmitters have completed before 
> proceeding XXX */
>
> -   for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, 
> di++)
> +   for (j = 0, di = txq->ift_ifdi; j < rxq->ifr_nfl; j++, di++)
> bzero((void *)di->idi_vaddr, di->idi_size);
> /* also resets the free lists pidx/cidx */
> for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
> @@ -4198,6 +4197,7 @@ iflib_device_register(device_t dev, void *sc, if_share
>
> scctx = &ctx->ifc_softc_ctx;
> ifp = ctx->ifc_ifp;
> +   ctx->ifc_nhwtxqs = sctx->isc_ntxqs;
>
> /*
>  * XXX sanity check that ntxd & nrxd are a power of 2
> ___
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r333175 - in head/sys: kern net netinet netinet6 sys

2018-05-03 Thread K. Macy
Can you give any context on what they're doing? In addition - even on
a production kernel it's possible to compile in DDB to at least get a
backtrace. Your report only gives us enough information to know that
there is _an_ issue. It's difficult to proceed on this alone. I do
have a report from the FreeBSD CI infrastructure that we're looking in
to now.  With luck that is the same issue.

-M

On Thu, May 3, 2018 at 12:31 PM, O. Hartmann  wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
>
> Am Wed, 2 May 2018 19:36:29 + (UTC)
> Stephen Hurd  schrieb:
>
>> Author: shurd
>> Date: Wed May  2 19:36:29 2018
>> New Revision: 333175
>> URL: https://svnweb.freebsd.org/changeset/base/333175
>>
>> Log:
>>   Separate list manipulation locking from state change in multicast
>>
>>   Multicast incorrectly calls in to drivers with a mutex held causing drivers
>>   to have to go through all manner of contortions to use a non sleepable 
>> lock.
>>   Serialize multicast updates instead.
>>
>>   Submitted by:   mmacy 
>>   Reviewed by:shurd, sbruno
>>   Sponsored by:   Limelight Networks
>>   Differential Revision:  https://reviews.freebsd.org/D14969
>>
>> Modified:
>>   head/sys/kern/subr_gtaskqueue.c
>>   head/sys/kern/subr_witness.c
>>   head/sys/net/if.c
>>   head/sys/netinet/igmp.c
>>   head/sys/netinet/igmp_var.h
>>   head/sys/netinet/in.c
>>   head/sys/netinet/in_mcast.c
>>   head/sys/netinet/in_pcb.c
>>   head/sys/netinet/in_var.h
>>   head/sys/netinet/ip_carp.c
>>   head/sys/netinet6/in6.c
>>   head/sys/netinet6/in6_ifattach.c
>>   head/sys/netinet6/in6_mcast.c
>>   head/sys/netinet6/in6_pcb.c
>>   head/sys/netinet6/in6_var.h
>>   head/sys/netinet6/mld6.c
>>   head/sys/netinet6/mld6_var.h
>>   head/sys/sys/gtaskqueue.h
>>
>> Modified: head/sys/kern/subr_gtaskqueue.c
>> ==
>> --- head/sys/kern/subr_gtaskqueue.c   Wed May  2 17:41:00 2018
>> (r333174)
>> +++ head/sys/kern/subr_gtaskqueue.c   Wed May  2 19:36:29 2018
>> (r333175)
>> @@ -53,6 +53,7 @@ static void gtaskqueue_thread_enqueue(void *);
>>  static void  gtaskqueue_thread_loop(void *arg);
>>
>>  TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
>> +TASKQGROUP_DEFINE(config, 1, 1);
>>
>>  struct gtaskqueue_busy {
>>   struct gtask*tb_running;
>> @@ -662,7 +663,7 @@ SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_F
>>
>>  void
>>  taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
>> -void *uniq, int irq, char *name)
>> +void *uniq, int irq, const char *name)
>>  {
>>   cpuset_t mask;
>>   int qid, error;
>> @@ -976,4 +977,13 @@ void
>>  taskqgroup_destroy(struct taskqgroup *qgroup)
>>  {
>>
>> +}
>> +
>> +void
>> +taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t 
>> *fn,
>> + const char *name)
>> +{
>> +
>> + GROUPTASK_INIT(gtask, 0, fn, ctx);
>> + taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
>>  }
>>
>> Modified: head/sys/kern/subr_witness.c
>> ==
>> --- head/sys/kern/subr_witness.c  Wed May  2 17:41:00 2018
>> (r333174)
>> +++ head/sys/kern/subr_witness.c  Wed May  2 19:36:29 2018
>> (r333175)
>> @@ -532,18 +532,22 @@ static struct witness_order_list_entry order_lists[] =
>>* IPv4 multicast:
>>* protocol locks before interface locks, after UDP locks.
>>*/
>> + { "in_multi_sx", &lock_class_sx },
>>   { "udpinp", &lock_class_rw },
>> - { "in_multi_mtx", &lock_class_mtx_sleep },
>> + { "in_multi_list_mtx", &lock_class_mtx_sleep },
>>   { "igmp_mtx", &lock_class_mtx_sleep },
>> + { "ifnet_rw", &lock_class_rw },
>>   { "if_addr_lock", &lock_class_rw },
>>   { NULL, NULL },
>>   /*
>>* IPv6 multicast:
>>* protocol locks before interface locks, after UDP locks.
>>*/
>> + { "in6_multi_sx", &lock_class_sx },
>>   { "udpinp", &lock_class_rw },
>> - { "in6_multi_mtx", &lock_class_mtx_sleep },
>> + { "in6_multi_list_mtx", &lock_class_mtx_sleep },
>>   { "mld_mtx", &lock_class_mtx_sleep },
>> + { "ifnet_rw", &lock_class_rw },
>>   { "if_addr_lock", &lock_class_rw },
>>   { NULL, NULL },
>>   /*
>>
>> Modified: head/sys/net/if.c
>> ==
>> --- head/sys/net/if.c Wed May  2 17:41:00 2018(r333174)
>> +++ head/sys/net/if.c Wed May  2 19:36:29 2018(r333175)
>> @@ -985,11 +985,13 @@ static void
>>  if_purgemaddrs(struct ifnet *ifp)
>>  {
>>   struct ifmultiaddr *ifma;
>> - struct ifmultiaddr *next;
>>
>>   IF_ADDR_WLOCK(ifp);
>> - TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
>> + while (!TAILQ_EMPTY(&ifp->if_multiaddrs)) {
>> + ifma = TAILQ_FIRST(&ifp->if_multiaddrs);
>> + TAILQ_REMOVE(&ifp->if_multiad

Re: svn commit: r333242 - head/sys/kern

2018-05-04 Thread K. Macy
Yes. Good catch. Thanks.

-M

>
> Missing Approved by: sbruno?
>
>
>> Modified:
>>   head/sys/kern/kern_descrip.c
>>
>> Modified: head/sys/kern/kern_descrip.c
>> ==
>> --- head/sys/kern/kern_descrip.c  Fri May  4 04:05:07 2018
>> (r333241)
>> +++ head/sys/kern/kern_descrip.c  Fri May  4 06:51:01 2018
>> (r333242)
>> @@ -1503,7 +1503,7 @@ filecaps_copy(const struct filecaps *src, struct filec
>>
>>   if (src->fc_ioctls != NULL && !locked)
>>   return (false);
>> - *dst = *src;
>> + memcpy(dst, src, sizeof(*src));
>>   if (src->fc_ioctls == NULL)
>>   return (true);
>>
>> @@ -1512,7 +1512,7 @@ filecaps_copy(const struct filecaps *src, struct filec
>>
>>   size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
>>   dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK);
>> - bcopy(src->fc_ioctls, dst->fc_ioctls, size);
>> + memcpy(dst->fc_ioctls, src->fc_ioctls, size);
>>   return (true);
>>  }
>>
>>
>>
>
> --
> Rod Grimes rgri...@freebsd.org
> ___
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r229800 - head/sys/conf

2012-01-09 Thread K. Macy
>
> The problem with having ZFS compiled into the kernel is that OpenSolaris
> compatiblity layer needs its headers to be included before system
> headers, which is/was impossible or hard to express for kernel
> compilation. AFAIR Kip Macy was working on it and my understanding was
> that he finished it or almost finished it (CCed).

I did indeed make it work. The (only) reason I didn't check it in was
because there were some collisions between freebsd's older zlib which
ppp depends on and the one in the solaris layer. Both bz and I were
kind of busy at the time.

> I'm all for making ZFS be compilable as part of the kernel itself, but
> it is really very low on my list, because ZFS simply works now and also
> because as I said, it was far from trivial for me to do it.
> If someone who is more familiar with the bits responsible for kernel
> compilation wants to work on including ZFS there, I'll gladly provide
> help from the ZFS side.

I think this can be done without much work, (re)work out the dependencies.

Cheers
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r230623 - in head/sys: amd64/amd64 cddl/compat/opensolaris/sys cddl/contrib/opensolaris/uts/common/fs/zfs sys vm

2012-01-28 Thread K Macy


Il giorno 28/gen/2012, alle ore 09:39, Pawel Jakub Dawidek  
ha scritto:

> On Fri, Jan 27, 2012 at 08:18:32PM +, Kip Macy wrote:
>> Author: kmacy
>> Date: Fri Jan 27 20:18:31 2012
>> New Revision: 230623
>> URL: http://svn.freebsd.org/changeset/base/230623
>> 
>> Log:
>>  exclude kmem_alloc'ed ARC data buffers from kernel minidumps on amd64
>>  excluding other allocations including UMA now entails the addition of
>>  a single flag to kmem_alloc or uma zone create
> 
> How do you handle the case when there are two small allocations within
> one page, where one has the NODUMP flag and one doesn't have the flag?

Kmem_alloc and zone creation both work at granularities greater than or equal 
to a page. The per-page management of dump is why this flag is not available to 
generic malloc.
> 
> As for GELI and opencrypto what I'd love to have is a flag that will
> prevent dumping the memory and that will clear memory on free. How hard
> would that be?
> 

I'll look into it. Off-hand I think it would be easy enough.

Cheers, 
Kip
> -- 
> Pawel Jakub Dawidek   http://www.wheelsystems.com
> FreeBSD committer http://www.FreeBSD.org
> Am I Evil? Yes, I Am! http://tupytaj.pl
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r230623 - in head/sys: amd64/amd64 cddl/compat/opensolaris/sys cddl/contrib/opensolaris/uts/common/fs/zfs sys vm

2012-01-28 Thread K. Macy
On Sat, Jan 28, 2012 at 9:39 AM, Pawel Jakub Dawidek  wrote:
> On Fri, Jan 27, 2012 at 08:18:32PM +, Kip Macy wrote:
>> Author: kmacy
>> Date: Fri Jan 27 20:18:31 2012
>> New Revision: 230623
>> URL: http://svn.freebsd.org/changeset/base/230623
>>
>> Log:
>>   exclude kmem_alloc'ed ARC data buffers from kernel minidumps on amd64
>>   excluding other allocations including UMA now entails the addition of
>>   a single flag to kmem_alloc or uma zone create
>

Done for malloc.9, zone.9 doesn't document any of uma's flags and I'm
not sufficiently well versed in nroff to add them with messing up the
existing formatting.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r225617 - in head/sys: amd64/amd64 amd64/linux32 arm/arm cddl/contrib/opensolaris/uts/common/dtrace cddl/contrib/opensolaris/uts/sparc/dtrace compat/freebsd32 compat/linux compat/svr4

2011-10-09 Thread K. Macy
Will do - sorry for the delay.

Thanks,
Kip

On Wed, Oct 5, 2011 at 6:17 PM, David O'Brien  wrote:
> On Fri, Sep 16, 2011 at 01:58:51PM +, Kip Macy wrote:
>> Author: kmacy
>> Date: Fri Sep 16 13:58:51 2011
>> New Revision: 225617
>> Log:
>>   In order to maximize the re-usability of kernel code in user space this
>>   patch modifies makesyscalls.sh to prefix all of the non-compatibility
>>   calls (e.g. not linux_, freebsd32_) with sys_ and updates the kernel
>>   entry points and all places in the code that use them. It also
>
> Hi Kip,
> __FreeBSD_version does not seem to have been bumped for this.
> Unfortunately this change has made a kernel module we use at work
> unbuildable, and I don't have a __FreeBSD_version value to #ifdef
> on.
>
> If ever there was a change that needed __FreeBSD_version this is one.
>
> Can you please get __FreeBSD_version bumped in 9-STABLE along with
> 9-RELENG?
>
> thanks,
> --
> -- David  (obr...@freebsd.org)
>
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r204654 - in head: sbin/newfs usr.bin/truncate

2010-03-03 Thread K. Macy
This broke world.

On Wed, Mar 3, 2010 at 11:25 AM, Maxim Sobolev  wrote:
> Author: sobomax
> Date: Wed Mar  3 19:25:28 2010
> New Revision: 204654
> URL: http://svn.freebsd.org/changeset/base/204654
>
> Log:
>  Use expand_number(3) from libutil instead of home-grown function to parse
>  human-friendly power-of-two numbers (i.e. 2k, 5M etc).
>
>  Suggested by: many
>  MFC after:    1 week
>
> Modified:
>  head/sbin/newfs/Makefile
>  head/sbin/newfs/newfs.c
>  head/sbin/newfs/newfs.h
>  head/usr.bin/truncate/Makefile
>  head/usr.bin/truncate/truncate.c
>
> Modified: head/sbin/newfs/Makefile
> ==
> --- head/sbin/newfs/Makefile    Wed Mar  3 19:14:05 2010        (r204653)
> +++ head/sbin/newfs/Makefile    Wed Mar  3 19:25:28 2010        (r204654)
> @@ -4,8 +4,8 @@
>  .PATH: ${.CURDIR}/../../sys/geom
>
>  PROG=  newfs
> -DPADD= ${LIBUFS}
> -LDADD= -lufs
> +DPADD= ${LIBUFS} ${LIBUTIL}
> +LDADD= -lufs -lutil
>  SRCS=  newfs.c mkfs.c geom_bsd_enc.c
>
>  WARNS?=        3
>
> Modified: head/sbin/newfs/newfs.c
> ==
> --- head/sbin/newfs/newfs.c     Wed Mar  3 19:14:05 2010        (r204653)
> +++ head/sbin/newfs/newfs.c     Wed Mar  3 19:25:28 2010        (r204654)
> @@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$");
>  #include 
>  #include 
>
> +#include 
> +
>  #include "newfs.h"
>
>  int    Eflag;                  /* Erase previous disk contents */
> @@ -90,19 +92,19 @@ int Jflag;                  /* enable gjournal for file
>  int    lflag;                  /* enable multilabel for file system */
>  int    nflag;                  /* do not create .snap directory */
>  intmax_t fssize;               /* file system size */
> -int    sectorsize;             /* bytes/sector */
> +int64_t        sectorsize;             /* bytes/sector */
>  int    realsectorsize;         /* bytes/sector in hardware */
> -int    fsize = 0;              /* fragment size */
> -int    bsize = 0;              /* block size */
> -int    maxbsize = 0;           /* maximum clustering */
> -int    maxblkspercg = MAXBLKSPERCG; /* maximum blocks per cylinder group */
> +int64_t        fsize = 0;              /* fragment size */
> +int64_t        bsize = 0;              /* block size */
> +int64_t        maxbsize = 0;           /* maximum clustering */
> +int64_t        maxblkspercg = MAXBLKSPERCG; /* maximum blocks per cylinder 
> group */
>  int    minfree = MINFREE;      /* free space threshold */
>  int    opt = DEFAULTOPT;       /* optimization preference (space or time) */
> -int    density;                /* number of bytes per inode */
> -int    maxcontig = 0;          /* max contiguous blocks to allocate */
> -int    maxbpg;                 /* maximum blocks per file in a cyl group */
> -int    avgfilesize = AVFILESIZ;/* expected average file size */
> -int    avgfilesperdir = AFPDIR;/* expected number of files per directory */
> +int64_t        density;                /* number of bytes per inode */
> +int64_t        maxcontig = 0;          /* max contiguous blocks to allocate 
> */
> +int64_t        maxbpg;                 /* maximum blocks per file in a cyl 
> group */
> +int64_t        avgfilesize = AVFILESIZ;/* expected average file size */
> +int64_t        avgfilesperdir = AFPDIR;/* expected number of files per 
> directory */
>  u_char *volumelabel = NULL;    /* volume label for filesystem */
>  struct uufsd disk;             /* libufs disk structure */
>
> @@ -117,7 +119,6 @@ static void getfssize(intmax_t *, const
>  static struct disklabel *getdisklabel(char *s);
>  static void rewritelabel(char *s, struct disklabel *lp);
>  static void usage(void);
> -static int parselength(const char *ls, int *sz);
>
>  ufs2_daddr_t part_ofs; /* partition offset in blocks, used with files */
>
> @@ -170,7 +171,7 @@ main(int argc, char *argv[])
>                        Rflag = 1;
>                        break;
>                case 'S':
> -                       rval = parselength(optarg, §orsize);
> +                       rval = expand_number(optarg, §orsize);
>                        if (rval < 0 || sectorsize <= 0)
>                                errx(1, "%s: bad sector size", optarg);
>                        break;
> @@ -184,13 +185,13 @@ main(int argc, char *argv[])
>                        Xflag++;
>                        break;
>                case 'a':
> -                       rval = parselength(optarg, &maxcontig);
> +                       rval = expand_number(optarg, &maxcontig);
>                        if (rval < 0 || maxcontig <= 0)
>                                errx(1, "%s: bad maximum contiguous blocks",
>                                    optarg);
>                        break;
>                case 'b':
> -                       rval = parselength(optarg, &bsize);
> +                       rval = expand_number(optarg, &bsize);
>                        if (rval < 0)
>      

Re: svn commit: r205132 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

2010-03-20 Thread K. Macy
Be my guest. I've established the technical merits in production.

Thanks,
Kip

On Mar 20, 2010 4:10 AM, "Pawel Jakub Dawidek"  wrote:

On Sat, Mar 13, 2010 at 09:41:53PM +, Kip Macy wrote:
> Author: kmacy
> Date: Sat Mar 13 21:41:5...
This is another[1] change we agreed (I hoped) to properly evaluate first
through zfs-c...@opensolaris.org.

1) Message-ID: <20100320110219.gd1...@garage.freebsd.pl>

--
Pawel Jakub Dawidek   http://www.wheelsystems.com
p...@freebsd.org   http://www.FreeBSD.org
FreeBSD committer Am I Evil? Yes, I Am!
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r205132 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

2010-03-20 Thread K. Macy
On Sat, Mar 20, 2010 at 3:10 AM, Pawel Jakub Dawidek  wrote:
> On Sat, Mar 13, 2010 at 09:41:53PM +, Kip Macy wrote:
>> Author: kmacy
>> Date: Sat Mar 13 21:41:52 2010
>> New Revision: 205132
>> URL: http://svn.freebsd.org/changeset/base/205132
>>
>> Log:
>>   Don't bottleneck on acquiring the stream locks - this avoids a massive
>>   drop off in throughput with large numbers of simultaneous reads
>
> This is another[1] change we agreed (I hoped) to properly evaluate first
> through zfs-c...@opensolaris.org.
>

Hi pjd -
I ran it by zfs-code@ a year ago. I never got any meaningful feedback.
I'm not being flippant when I say that I would like you to interact
with them.

One thing you need to also bear in mind is that Opensolaris !=
FreeBSD. I don't mean that in the phk "this is the wrong mailing list"
sense, but rather the two have different behavioral properties. The
deadlock that I had to track down by hand because you had disabled
witness on ZFS locks occurred within 10 minutes on a modest web
serving load and within 2 minutes under fsstress. It had been in ZFS
quite some time when they finally patched it and yet it was a very
serious problem on FreeBSD. Web serving throughput would drop from
2.5Gbps to a few hundred megabits per second without this change. I
can only guess that sx locks do not behave the same as Solaris'
mutexes to such a large extent that we get very different behaviors
and you'll have to keep an open mind to local changes that cope with
that fact.


Thanks,
Kip
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r205487 - head/sys/vm

2010-03-23 Thread K. Macy
The size change causes the initialization path to change in a way that
currently causes crashes.

On Mar 23, 2010 2:57 AM, "Ivan Voras"  wrote:

On 22 March 2010 23:39, Kip Macy  wrote:
> Author: kmacy
> Date: Mon Mar 22 22:39...
Does this mean you have determined that aligning these structures is
not as beneficial on i386 or is there something else going on?
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r205487 - head/sys/vm

2010-03-23 Thread K. Macy
On Tue, Mar 23, 2010 at 6:54 AM, John Baldwin  wrote:
> On Tuesday 23 March 2010 6:33:51 am K. Macy wrote:
>> The size change causes the initialization path to change in a way that
>> currently causes crashes.
>
> Are you planning on debugging it further?  Does UMA_BOOTPAGES or NKPT need to
> be larger?
>
>> On Mar 23, 2010 2:57 AM, "Ivan Voras"  wrote:
>>
>> On 22 March 2010 23:39, Kip Macy  wrote:
>> > Author: kmacy
>> > Date: Mon Mar 22 22:39...
>> Does this mean you have determined that aligning these structures is
>> not as beneficial on i386 or is there something else going on?
>>
>
> --
> John Baldwin


>From a mail I sent to Jeff:

This pushes things through keg_large_init (whereas previously it went
through keg_small_init) which breaks things on i386.
I can workaround the fact that hash_alloc gets called before hashzone
is initialized but trying to allocate a slab while during
initialization of the slab zone is problematic. Suggestions are
welcome.


(gdb) bt
#0  keg_alloc_slab (keg=0xc0ab3000, zone=0xc0547c80, wait=2)
   at /usr/home/kmacy/svn_checkouts/head_flowtable/sys/vm/uma_core.c:823
#1  0xc02dbfaf in keg_fetch_slab (keg=0xc0ab3000, zone=0xc0547c80, flags=2)
   at /usr/home/kmacy/svn_checkouts/head_flowtable/sys/vm/uma_core.c:2159
#2  0xc02dc2cc in zone_fetch_slab (zone=0xc0547c80, keg=0xc0ab3000, flags=2)
   at /usr/home/kmacy/svn_checkouts/head_flowtable/sys/vm/uma_core.c:2219
#3  0xc02db3ab in zone_alloc_item (zone=0xc0547c80, udata=0xc073cc48, flags=2)
   at /usr/home/kmacy/svn_checkouts/head_flowtable/sys/vm/uma_core.c:2475
#4  0xc02db74f in uma_zcreate (name=0xc037ef33 "UMA Slabs", size=284,
ctor=0, dtor=0, uminit=0,
   fini=0, align=3, flags=536870912)
   at /usr/home/kmacy/svn_checkouts/head_flowtable/sys/vm/uma_core.c:1827
#5  0xc02dcc6b in uma_startup (bootmem=0xc0a64000, boot_pages=80)
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r206140 - head/sys/vm

2010-04-19 Thread K. Macy
How has the problem been addressed? I'm seeing periodic panics with
non-zero resident count with the page lock patch applied. It is
possible that I've inadvertently re-introduced an issue you've fixed.

Thanks,
Kip

On Sat, Apr 3, 2010 at 9:20 AM, Alan Cox  wrote:
> Author: alc
> Date: Sat Apr  3 16:20:22 2010
> New Revision: 206140
> URL: http://svn.freebsd.org/changeset/base/206140
>
> Log:
>  Re-enable the call to pmap_release() by vmspace_dofree().  The accounting
>  problem that is described in the comment has been addressed.
>
>  Submitted by: kib
>  Tested by:    pho (a few months ago)
>  MFC after:    6 weeks
>
> Modified:
>  head/sys/vm/vm_map.c
>
> Modified: head/sys/vm/vm_map.c
> ==
> --- head/sys/vm/vm_map.c        Sat Apr  3 15:52:32 2010        (r206139)
> +++ head/sys/vm/vm_map.c        Sat Apr  3 16:20:22 2010        (r206140)
> @@ -313,6 +313,7 @@ vm_init2(void)
>  static inline void
>  vmspace_dofree(struct vmspace *vm)
>  {
> +
>        CTR1(KTR_VM, "vmspace_free: %p", vm);
>
>        /*
> @@ -329,12 +330,8 @@ vmspace_dofree(struct vmspace *vm)
>        (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset,
>            vm->vm_map.max_offset);
>
> -       /*
> -        * XXX Comment out the pmap_release call for now. The
> -        * vmspace_zone is marked as UMA_ZONE_NOFREE, and bugs cause
> -        * pmap.resident_count to be != 0 on exit sometimes.
> -        */
> -/*     pmap_release(vmspace_pmap(vm)); */
> +       pmap_release(vmspace_pmap(vm));
> +       vm->vm_map.pmap = NULL;
>        uma_zfree(vmspace_zone, vm);
>  }
>
>
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r284959 - in head: . share/man/man4 share/man/man9 sys/conf sys/dev/glxsb sys/dev/hifn sys/dev/random sys/dev/rndtest sys/dev/safe sys/dev/syscons sys/dev/ubsec sys/dev/virtio/random s

2015-07-16 Thread K. Macy
I discovered this when I MFC'd and my kernel wouldn't link because of
unresolved symbols. I thought I had put the issue aside when I added
RANDOM_DUMMY to my kernel config.

However, I just hit this:

while (!random_alg_context.ra_seeded()) {
if (nonblock) {
error = EWOULDBLOCK;
break;
}
tsleep(&random_alg_context, 0, "randseed", hz/10);
/* keep tapping away at the pre-read until we seed/unblock. */
random_alg_context.ra_pre_read();
printf("random: %s unblock wait\n", __func__);
}

My system wouldn't boot because this was endlessly spamming the
console. I don't know what the right default here is. But I can say
that this is not it.

I would also like to observe that a good back of the envelope
per-packet budget is 1000 cycles (stock FreeBSD is much worse than
this - nonetheless it is a reasonable figure to strive for). Many
drivers will for a variety of reasons allocate mbufs and clusters
separately. This means that if you're harvesting on alloc / free, your
function is being called 4 times per-packet. Let's have a hypothetical
that your function takes 50 cycles* - in this case your entropy
collection would be consuming a full 20% of the per-packet budget. At
high-packet rates your entropy is likely to be caused more by
indeterminacy of cache misses and lock contention than any true
randomness in the arrival rate.


* It may be less, it may be much more.


I'm not sure where the workflow fell short in this instance, but I'm
inclined to believe that this could have been handled better.

-K



On Thu, Jul 16, 2015 at 11:29 AM, Mark R V Murray  wrote:
>
>> On 16 Jul 2015, at 15:08, Ian Lepore  wrote:
>>
>> On Thu, 2015-07-16 at 06:39 +0100, Mark Murray wrote:
 On 15 Jul 2015, at 23:43, Adrian Chadd  wrote:

> - Add harvesting of slab allocator events. This needs to be checked for
>   weighing down the allocator code.

 Hi,

 Is this really doing it upon every one of those events? eg, for each
 mbuf alloc through UMA?
>>>
>>> Only if you turn it on!
>>>
>>> M
>>>
>>
>> In random_harvestq_init() I see
>>
>> harvest_context.hc_source_mask = RANDOM_HARVEST_EVERYTHING_MASK;
>>
>> and
>>
>> #define RANDOM_HARVEST_EVERYTHING_MASK ((1 << (RANDOM_ENVIRONMENTAL_END
>> + 1)) - 1)
>>
>> So doesn't that include the RANDOM_FAST flag that controls harvesting
>> during every UMA alloc and free call?  And that harvesting appears to be
>> anything but fast, at least at a glance... it looks like it passes the
>> entire struct uma_zone to the jenkins hash function... is there really
>> useful entropy in all the data in that struct?
>
> Well spotted, but fear not. All sources are on at startup, and this
> is to ensure that the generator has maximal access to entropy while
> booting.
>
> One of the default duties of etc/rc.d/random is to turn off the UMA
> and ATIME sources. These may be turned on if you want them, but by
> default on the fully booted system they are off.
>
> See ‘sysctl kern.random.harvest.mask_symbolic’ and note that the
> disabled sources are in [].
>
> I have yet to do a full set of benchmarks, but I have discussed
> this with RWatson. A silly benchmark (make world) shows little
> effect, but I will be doing this properly in coming months.
>
> In answer to you final question - yes. The UMA entropy is a bit
> spread out, but it is good.
>
> M
> --
> Mark R V Murray
>
> ___
> svn-src-head@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Re: svn commit: r284959 - in head: . share/man/man4 share/man/man9 sys/conf sys/dev/glxsb sys/dev/hifn sys/dev/random sys/dev/rndtest sys/dev/safe sys/dev/syscons sys/dev/ubsec sys/dev/virtio/random s

2015-07-16 Thread K. Macy
On Thu, Jul 16, 2015 at 3:28 PM, K. Macy  wrote:
> I discovered this when I MFC'd and my kernel wouldn't link because of
> unresolved symbols. I thought I had put the issue aside when I added
> RANDOM_DUMMY to my kernel config.
>
> However, I just hit this:
>
> while (!random_alg_context.ra_seeded()) {
> if (nonblock) {
> error = EWOULDBLOCK;
> break;
> }
> tsleep(&random_alg_context, 0, "randseed", hz/10);
> /* keep tapping away at the pre-read until we seed/unblock. */
> random_alg_context.ra_pre_read();
> printf("random: %s unblock wait\n", __func__);
> }
>
> My system wouldn't boot because this was endlessly spamming the
> console. I don't know what the right default here is. But I can say
> that this is not it.


I've also realized that a process blocked here is uninterruptible.
Hence any process reading an insufficiently seeded /dev/random is
unkillable. For example my boot can't proceed past dd doing a read and
I can't ^C it. Did you test RANDOM_DUMMY?


-K
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r285021 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

2015-07-30 Thread K. Macy
Just FYI this change introduces a deadlock with with the
spa_namespace_lock. Mount will be holding this lock while trying to
acquire the spa_namespace_lock. zfskern on the other hand holds the
spa_namespace_lock when calling zfs_freebsd_access  which in turn
tries to acquire the teardown lock.

static int
zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
caller_context_t *ct)
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
int error;

ZFS_ENTER(zfsvfs);

where:
/* Called on entry to each ZFS vnode and vfs operation  */
#define ZFS_ENTER(zfsvfs) \
{ \
rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \
} \


I think this makes a pretty strong case for the need to educate
WITNESS about rrm locks.

Cheers.

-K










On Thu, Jul 2, 2015 at 1:32 AM, Andriy Gapon  wrote:
> Author: avg
> Date: Thu Jul  2 08:32:02 2015
> New Revision: 285021
> URL: https://svnweb.freebsd.org/changeset/base/285021
>
> Log:
>   zfs_mount(MS_REMOUNT): protect zfs_(un)register_callbacks calls
>
>   We now take z_teardown_lock as a writer to ensure that there is no I/O
>   while the filesystem state is in a flux.  Also, zfs_suspend_fs() ->
>   zfsvfs_teardown() call zfs_unregister_callbacks() and zfs_resume_fs() ->
>   zfsvfs_setup() call zfs_unregister_callbacks().  Previously there was no
>   synchronization between those calls and the calls in the re-mounting
>   case.  That could lead to concurrent execution and a crash.
>
>   PR:   180060
>   Differential Revision:https://reviews.freebsd.org/D2865
>   Suggested by: mahrens
>   Reviewed by:  delphij, pho, mahrens, will
>   MFC after:13 days
>   Sponsored by: ClusterHQ
>
> Modified:
>   head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
>
> Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
> ==
> --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.cThu 
> Jul  2 08:25:45 2015(r285020)
> +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.cThu 
> Jul  2 08:32:02 2015(r285021)
> @@ -1717,9 +1717,19 @@ zfs_mount(vfs_t *vfsp)
>  * according to those options set in the current VFS options.
>  */
> if (vfsp->vfs_flag & MS_REMOUNT) {
> -   /* refresh mount options */
> -   zfs_unregister_callbacks(vfsp->vfs_data);
> +   zfsvfs_t *zfsvfs = vfsp->vfs_data;
> +
> +   /*
> +* Refresh mount options with z_teardown_lock blocking I/O 
> while
> +* the filesystem is in an inconsistent state.
> +* The lock also serializes this code with filesystem
> +* manipulations between entry to zfs_suspend_fs() and return
> +* from zfs_resume_fs().
> +*/
> +   rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
> +   zfs_unregister_callbacks(zfsvfs);
> error = zfs_register_callbacks(vfsp);
> +   rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
> goto out;
> }
>
> ___
> svn-src-head@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r285021 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

2015-08-05 Thread K. Macy
His proposed solution looks fine to me. Why don't we commit that?

-K

On Wed, Aug 5, 2015 at 7:22 PM, Glen Barber  wrote:
> On Thu, Aug 06, 2015 at 04:17:23AM +0200, Pawel Jakub Dawidek wrote:
>> On Mon, Aug 03, 2015 at 04:20:04PM +0300, Andriy Gapon wrote:
>> > On 30/07/2015 10:24, K. Macy wrote:
>> > > Just FYI this change introduces a deadlock with with the
>> > > spa_namespace_lock. Mount will be holding this lock while trying to
>> > > acquire the spa_namespace_lock. zfskern on the other hand holds the
>> > > spa_namespace_lock when calling zfs_freebsd_access  which in turn
>> > > tries to acquire the teardown lock.
>> >
>> > I missed the fact that zpool.cache file is being written with 
>> > spa_namespace_lock
>> > held.
>> > I'll try to either resolve the problem in the next day or I will revert 
>> > the change.
>>
>> FYI, I'm hitting this deadlock on my laptop. Reverting the change fixes
>> the problem.
>>
>
> Since it is now 3 days after "I'll try to either resolve the problem in
> the next day or I will revert the change", my view is that this needs to
> be reverted now, if it hasn't been done already.
>
> Glen
>
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
Please back this out now. It was a substantial interface change
without review. This should not be up for debate. I hope the others
have the fortitude to insist upon this.

-K

On Thu, Jan 15, 2015 at 7:32 AM, Hans Petter Selasky
 wrote:
> Author: hselasky
> Date: Thu Jan 15 15:32:30 2015
> New Revision: 277213
> URL: https://svnweb.freebsd.org/changeset/base/277213
>
> Log:
>   Major callout subsystem cleanup and rewrite:
>   - Close a migration race where callout_reset() failed to set the
> CALLOUT_ACTIVE flag.
>   - Callout callback functions are now allowed to be protected by
> spinlocks.
>   - Switching the callout CPU number cannot always be done on a
> per-callout basis. See the updated timeout(9) manual page for more
> information.
>   - The timeout(9) manual page has been updated to reflect how all the
> functions inside the callout API are working. The manual page has
> been made function oriented to make it easier to deduce how each of
> the functions making up the callout API are working without having
> to first read the whole manual page. Group all functions into a
> handful of sections which should give a quick top-level overview
> when the different functions should be used.
>   - The CALLOUT_SHAREDLOCK flag and its functionality has been removed
> to reduce the complexity in the callout code and to avoid problems
> about atomically stopping callouts via callout_stop(). If someone
> needs it, it can be re-added. From my quick grep there are no
> CALLOUT_SHAREDLOCK clients in the kernel.
>   - A new callout API function named "callout_drain_async()" has been
> added. See the updated timeout(9) manual page for a complete
> description.
>   - Update the callout clients in the "kern/" folder to use the callout
> API properly, like cv_timedwait(). Previously there was some custom
> sleepqueue code in the callout subsystem, which has been removed,
> because we now allow callouts to be protected by spinlocks. This
> allows us to tear down the callout like done with regular mutexes,
> and a "td_slpmutex" has been added to "struct thread" to atomically
> teardown the "td_slpcallout". Further the "TDF_TIMOFAIL" and
> "SWT_SLEEPQTIMO" states can now be completely removed. Currently
> they are marked as available and will be cleaned up in a follow up
> commit.
>   - Bump the __FreeBSD_version to indicate kernel modules need
> recompilation.
>   - There has been several reports that this patch "seems to squash a
> serious bug leading to a callout timeout and panic".
>
>   Kernel build testing: all architectures were built
>   MFC after:2 weeks
>   Differential Revision:https://reviews.freebsd.org/D1438
>   Sponsored by: Mellanox Technologies
>   Reviewed by:  jhb, adrian, sbruno and emaste
>
> Modified:
>   head/share/man/man9/Makefile
>   head/share/man/man9/timeout.9
>   head/sys/kern/init_main.c
>   head/sys/kern/kern_condvar.c
>   head/sys/kern/kern_lock.c
>   head/sys/kern/kern_switch.c
>   head/sys/kern/kern_synch.c
>   head/sys/kern/kern_thread.c
>   head/sys/kern/kern_timeout.c
>   head/sys/kern/subr_sleepqueue.c
>   head/sys/ofed/include/linux/completion.h
>   head/sys/sys/_callout.h
>   head/sys/sys/callout.h
>   head/sys/sys/param.h
>   head/sys/sys/proc.h
>
> Modified: head/share/man/man9/Makefile
> ==
> --- head/share/man/man9/MakefileThu Jan 15 14:47:48 2015
> (r277212)
> +++ head/share/man/man9/MakefileThu Jan 15 15:32:30 2015
> (r277213)
> @@ -1570,6 +1570,7 @@ MLINKS+=timeout.9 callout.9 \
> timeout.9 callout_active.9 \
> timeout.9 callout_deactivate.9 \
> timeout.9 callout_drain.9 \
> +   timeout.9 callout_drain_async.9 \
> timeout.9 callout_handle_init.9 \
> timeout.9 callout_init.9 \
> timeout.9 callout_init_mtx.9 \
>
> Modified: head/share/man/man9/timeout.9
> ==
> --- head/share/man/man9/timeout.9   Thu Jan 15 14:47:48 2015
> (r277212)
> +++ head/share/man/man9/timeout.9   Thu Jan 15 15:32:30 2015
> (r277213)
> @@ -29,13 +29,14 @@
>  .\"
>  .\" $FreeBSD$
>  .\"
> -.Dd October 8, 2014
> +.Dd January 14, 2015
>  .Dt TIMEOUT 9
>  .Os
>  .Sh NAME
>  .Nm callout_active ,
>  .Nm callout_deactivate ,
>  .Nm callout_drain ,
> +.Nm callout_drain_async ,
>  .Nm callout_handle_init ,
>  .Nm callout_init ,
>  .Nm callout_init_mtx ,
> @@ -63,279 +64,232 @@
>  .In sys/systm.h
>  .Bd -literal
>  typedef void timeout_t (void *);
> +typedef void callout_func_t (void *);
>  .Ed
> -.Ft int
> -.Fn callout_active "struct callout *c"
> -.Ft void
> -.Fn callout_deactivate "struct callout *c"
> -.Ft int
> -.Fn callout_drain "struct callout *c"
> -.Ft void
> -.Fn callout_handle_init "struct callout_handle *han

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
I think you're working around driver locking bugs by crippling the callout code.

-K

On Tue, Jan 20, 2015 at 3:35 PM, Sean Bruno  wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
>
> On 01/20/15 14:30, Hans Petter Selasky wrote:
>> On 01/20/15 22:11, Gleb Smirnoff wrote:
>>> On Tue, Jan 20, 2015 at 09:51:26AM +0200, Konstantin Belousov
>>> wrote: K> > Like stated in the manual page,
>>> callout_reset_curcpu/on() does not work K> > with MPSAFE callouts
>>> any more! K> I.e. you 'fixed' some undeterminate bugs in callout
>>> migration by not K> doing migration at all anymore. K> K> > K> >
>>> You need to use callout_init_{mtx,rm,rw} and remove the custom
>>> locking K> > inside the callback in the TCP stack to get it
>>> working like before! K> K> No, you need to do this, if you think
>>> that whole callout KPI must be K> rototiled.  It is up to the
>>> person who modifies the KPI, to ensure that K> existing code is
>>> not broken. K> K> As I understand, currently we are back to the
>>> one-cpu callouts. K> Do other people consider this situation
>>> acceptable ?
>>>
>>> I think this isn't acceptable. The commit to a complex subsystem
>>> lacked a review from persons involved in the system before. The
>>> commit to subsystem broke consumers of the subsystem and this was
>>> even done not accidentially, but due to Hans not caring about
>>> it.
>>>
>>> As for me this is enough to request a backout, and let the
>>> change back in only after proper review.
>>>
>>
>> Hi Gleb,
>>
>> Backing out my callout API patch means we will for sure
>> re-introduce an unknown callout spinlock hang, as noted to me by
>> several people. What do you think about that? dram Maybe "Jason
>> Wolfe" CC'ed can add to 10-stable w/o my patches:
>>
>
> Jason picked up this patch for work and it resolved our instability
> issues that had remained unsolved for quite some time as reported to
> freebsd-net:
>
> https://lists.freebsd.org/pipermail/freebsd-net/2015-January/040895.html
>
> This had gone undiagnosed for some time (even with the gracious help
> of jhb in offline emails, thanks btw!).
>
> There's some diagnostics in that email thread that may be of value to
> you folks for determination of the validity of changing the callout
> API or at least understanding why we were involved in diagnostics.
>
> While I'd sure love to tune performance, the fact that our machines
> were basically going out to lunch without these changes, probably
> means that others were seeing it and didn't know what else to do.  As
> much as I enjoy a good "break out the pitch forks and torches" email
> thread, this increased stability for us and is allowing us to upgrade
> from freebsd8 to freebsd10.  Bear this in mind when you throw your
> voice in favor of reverting.
>
>> int callout_reset_sbt_on(struct callout *c, sbintime_t sbt,
>> sbintime_t precision, void (*ftn)(void *), void *arg, int cpu, int
>> flags) { sbintime_t to_sbt, pr; struct callout_cpu *cc; int
>> cancelled, direct;
>>
>> +   cpu = timeout_cpu;   /* XXX test code XXX */
>>
>> cancelled = 0;
>>
>
> Jason or I would have to run this in production, which would be
> problematic I fear.  We never had a deterministic test case that would
> exhibit the reported failure.  We merely "tested in production" and
> saw that panics ceased.  We didn't note a dropoff in our traffic
> either, perhaps we are not as efficient as others in this corner case,
> but we were consistently seeing the spinlock hangs after a day or so
> of traffic.
>
>> And see if he observes a callout spinlock hang or not on his test
>> setup. The patch above should force all callouts to the same thread
>> basically. Then we could maybe see if single threading the callouts
>> has anything to do with solving the spinlock hang.
>>
>> The "rewritten" callout API still has all the features and
>> capabilities the old one had, when used as described in "man 9
>> callout".
>>
>> At the present moment I'm not technically convinced a backout is
>> correct.
>
> Neither am I, to be honest.  Just based on *results*.
>
>>
>> Gleb: I think we would see far better results with high speed
>> internet links using TCP if we could extend the LRO (large receive
>> offload) code to accumulate more than 64KBytes worth of data per
>> call to the TCP stack instead of complaining about some callouts
>> ending up on the same thread! Actually I have a patch for that.
>>
>> --HPS
>>
>>
>>
>
> -BEGIN PGP SIGNATURE-
> Version: GnuPG v2
>
> iQF8BAEBCgBmBQJUvuYrXxSAAC4AKGlzc3Vlci1mcHJAbm90YXRpb25zLm9w
> ZW5wZ3AuZmlmdGhob3JzZW1hbi5uZXRCQUFENDYzMkU3MTIxREU4RDIwOTk3REQx
> MjAxRUZDQTFFNzI3RTY0AAoJEBIB78oecn5kJTMIAMfh6ghV/AwQauY+a44q1hjJ
> WC7E3u69FK0opgSYg71kk6HckbyB+sTWND6HdXnpyrcMLXUt74zlB8c48wbUUV5+
> EwKNYzGNNnDNhoc0WtPMect8e9Y1kBRvSGfHBdVrqATXfPOyZEa+i4lQAXpiFKIt
> nndqVrAH7bJM6143YDpnIg7vaR+8IQnC2ztSP4ogJzh03DZ7zVsg4BsoCPg50eVZ
> kr46cXcE+SP/TsQBsVNVwRJD5NFie6QJdLoTnwkd0XfQGJMIWivNgUcE4tIxqPIf
> nGQ0xMJCotpNLuPtzYzCIurSaDHdHmL6bjkhjTtBm

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
Are any other drivers hitting this? e.g. cxgb/cxgbe?

-K

On Tue, Jan 20, 2015 at 3:46 PM, Sean Bruno  wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
>
> On 01/20/15 15:40, K. Macy wrote:
>> I think you're working around driver locking bugs by crippling the
>> callout code.
>>
>> -K
>>
>
> We had zero evidence of this.  What leads you down that path?  I'm
> totally open to being wrong, e.g. "yeah, you slowed down things so
> that you don't hit a race condition"
>
> sean
>
>> On Tue, Jan 20, 2015 at 3:35 PM, Sean Bruno
>>  wrote: On 01/20/15 14:30, Hans Petter
>> Selasky wrote:
>>>>> On 01/20/15 22:11, Gleb Smirnoff wrote:
>>>>>> On Tue, Jan 20, 2015 at 09:51:26AM +0200, Konstantin
>>>>>> Belousov wrote: K> > Like stated in the manual page,
>>>>>> callout_reset_curcpu/on() does not work K> > with MPSAFE
>>>>>> callouts any more! K> I.e. you 'fixed' some undeterminate
>>>>>> bugs in callout migration by not K> doing migration at all
>>>>>> anymore. K> K> > K> > You need to use
>>>>>> callout_init_{mtx,rm,rw} and remove the custom locking K> >
>>>>>> inside the callback in the TCP stack to get it working like
>>>>>> before! K> K> No, you need to do this, if you think that
>>>>>> whole callout KPI must be K> rototiled.  It is up to the
>>>>>> person who modifies the KPI, to ensure that K> existing
>>>>>> code is not broken. K> K> As I understand, currently we are
>>>>>> back to the one-cpu callouts. K> Do other people consider
>>>>>> this situation acceptable ?
>>>>>>
>>>>>> I think this isn't acceptable. The commit to a complex
>>>>>> subsystem lacked a review from persons involved in the
>>>>>> system before. The commit to subsystem broke consumers of
>>>>>> the subsystem and this was even done not accidentially, but
>>>>>> due to Hans not caring about it.
>>>>>>
>>>>>> As for me this is enough to request a backout, and let the
>>>>>> change back in only after proper review.
>>>>>>
>>>>>
>>>>> Hi Gleb,
>>>>>
>>>>> Backing out my callout API patch means we will for sure
>>>>> re-introduce an unknown callout spinlock hang, as noted to me
>>>>> by several people. What do you think about that? dram Maybe
>>>>> "Jason Wolfe" CC'ed can add to 10-stable w/o my patches:
>>>>>
>>
>> Jason picked up this patch for work and it resolved our
>> instability issues that had remained unsolved for quite some time
>> as reported to freebsd-net:
>>
>> https://lists.freebsd.org/pipermail/freebsd-net/2015-January/040895.html
>>
>>  This had gone undiagnosed for some time (even with the gracious
>> help of jhb in offline emails, thanks btw!).
>>
>> There's some diagnostics in that email thread that may be of value
>> to you folks for determination of the validity of changing the
>> callout API or at least understanding why we were involved in
>> diagnostics.
>>
>> While I'd sure love to tune performance, the fact that our
>> machines were basically going out to lunch without these changes,
>> probably means that others were seeing it and didn't know what else
>> to do.  As much as I enjoy a good "break out the pitch forks and
>> torches" email thread, this increased stability for us and is
>> allowing us to upgrade from freebsd8 to freebsd10.  Bear this in
>> mind when you throw your voice in favor of reverting.
>>
>>>>> int callout_reset_sbt_on(struct callout *c, sbintime_t sbt,
>>>>> sbintime_t precision, void (*ftn)(void *), void *arg, int
>>>>> cpu, int flags) { sbintime_t to_sbt, pr; struct callout_cpu
>>>>> *cc; int cancelled, direct;
>>>>>
>>>>> +   cpu = timeout_cpu;   /* XXX test code XXX */
>>>>>
>>>>> cancelled = 0;
>>>>>
>>
>> Jason or I would have to run this in production, which would be
>> problematic I fear.  We never had a deterministic test case that
>> would exhibit the reported failure.  We merely "tested in
>> production" and saw that panics ceased.  We didn't note a dropoff
>> in our traffic either

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
On Tue, Jan 20, 2015 at 3:48 PM, K. Macy  wrote:
> Are any other drivers hitting this? e.g. cxgb/cxgbe?

The evidence is simply past experience of recurring locking and
control flow bugs in the intel drivers.

-K

>
> -K
>
> On Tue, Jan 20, 2015 at 3:46 PM, Sean Bruno  wrote:
>> -BEGIN PGP SIGNED MESSAGE-
>> Hash: SHA512
>>
>> On 01/20/15 15:40, K. Macy wrote:
>>> I think you're working around driver locking bugs by crippling the
>>> callout code.
>>>
>>> -K
>>>
>>
>> We had zero evidence of this.  What leads you down that path?  I'm
>> totally open to being wrong, e.g. "yeah, you slowed down things so
>> that you don't hit a race condition"
>>
>> sean
>>
>>> On Tue, Jan 20, 2015 at 3:35 PM, Sean Bruno
>>>  wrote: On 01/20/15 14:30, Hans Petter
>>> Selasky wrote:
>>>>>> On 01/20/15 22:11, Gleb Smirnoff wrote:
>>>>>>> On Tue, Jan 20, 2015 at 09:51:26AM +0200, Konstantin
>>>>>>> Belousov wrote: K> > Like stated in the manual page,
>>>>>>> callout_reset_curcpu/on() does not work K> > with MPSAFE
>>>>>>> callouts any more! K> I.e. you 'fixed' some undeterminate
>>>>>>> bugs in callout migration by not K> doing migration at all
>>>>>>> anymore. K> K> > K> > You need to use
>>>>>>> callout_init_{mtx,rm,rw} and remove the custom locking K> >
>>>>>>> inside the callback in the TCP stack to get it working like
>>>>>>> before! K> K> No, you need to do this, if you think that
>>>>>>> whole callout KPI must be K> rototiled.  It is up to the
>>>>>>> person who modifies the KPI, to ensure that K> existing
>>>>>>> code is not broken. K> K> As I understand, currently we are
>>>>>>> back to the one-cpu callouts. K> Do other people consider
>>>>>>> this situation acceptable ?
>>>>>>>
>>>>>>> I think this isn't acceptable. The commit to a complex
>>>>>>> subsystem lacked a review from persons involved in the
>>>>>>> system before. The commit to subsystem broke consumers of
>>>>>>> the subsystem and this was even done not accidentially, but
>>>>>>> due to Hans not caring about it.
>>>>>>>
>>>>>>> As for me this is enough to request a backout, and let the
>>>>>>> change back in only after proper review.
>>>>>>>
>>>>>>
>>>>>> Hi Gleb,
>>>>>>
>>>>>> Backing out my callout API patch means we will for sure
>>>>>> re-introduce an unknown callout spinlock hang, as noted to me
>>>>>> by several people. What do you think about that? dram Maybe
>>>>>> "Jason Wolfe" CC'ed can add to 10-stable w/o my patches:
>>>>>>
>>>
>>> Jason picked up this patch for work and it resolved our
>>> instability issues that had remained unsolved for quite some time
>>> as reported to freebsd-net:
>>>
>>> https://lists.freebsd.org/pipermail/freebsd-net/2015-January/040895.html
>>>
>>>  This had gone undiagnosed for some time (even with the gracious
>>> help of jhb in offline emails, thanks btw!).
>>>
>>> There's some diagnostics in that email thread that may be of value
>>> to you folks for determination of the validity of changing the
>>> callout API or at least understanding why we were involved in
>>> diagnostics.
>>>
>>> While I'd sure love to tune performance, the fact that our
>>> machines were basically going out to lunch without these changes,
>>> probably means that others were seeing it and didn't know what else
>>> to do.  As much as I enjoy a good "break out the pitch forks and
>>> torches" email thread, this increased stability for us and is
>>> allowing us to upgrade from freebsd8 to freebsd10.  Bear this in
>>> mind when you throw your voice in favor of reverting.
>>>
>>>>>> int callout_reset_sbt_on(struct callout *c, sbintime_t sbt,
>>>>>> sbintime_t precision, void (*ftn)(void *), void *arg, int
>>>>>> cpu, int flags) { sbintime_t to_sbt, pr; struct callout_cpu
>>>>>> *cc; int cancelled, direct;
>>>>>&

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
On Tue, Jan 20, 2015 at 3:53 PM, Sean Bruno  wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
>
> On 01/20/15 15:48, K. Macy wrote:
>> Are any other drivers hitting this? e.g. cxgb/cxgbe?
>>
>> -K
>>
>
> Unkown to me.  Nor am I aware of anyone else who ever hit our panics
> either.  Our environment, and the failure, was only seen in the Intel
> 10GE space (ixgbe).  This is an artifact of our use cases, and hasn't
> been expanded nor tested in our environment with other vendor interfaces.

For an ill characterized problem isolated to one environment this
seems like a workaround that should not be part of the general code
base.

-K


> sean
>
>> On Tue, Jan 20, 2015 at 3:46 PM, Sean Bruno
>>  wrote: On 01/20/15 15:40, K. Macy wrote:
>>>>> I think you're working around driver locking bugs by
>>>>> crippling the callout code.
>>>>>
>>>>> -K
>>>>>
>>
>> We had zero evidence of this.  What leads you down that path?  I'm
>> totally open to being wrong, e.g. "yeah, you slowed down things so
>> that you don't hit a race condition"
>>
>> sean
>>
>>>>> On Tue, Jan 20, 2015 at 3:35 PM, Sean Bruno
>>>>>  wrote: On 01/20/15 14:30, Hans
>>>>> Petter Selasky wrote:
>>>>>>>> On 01/20/15 22:11, Gleb Smirnoff wrote:
>>>>>>>>> On Tue, Jan 20, 2015 at 09:51:26AM +0200, Konstantin
>>>>>>>>> Belousov wrote: K> > Like stated in the manual page,
>>>>>>>>> callout_reset_curcpu/on() does not work K> > with
>>>>>>>>> MPSAFE callouts any more! K> I.e. you 'fixed' some
>>>>>>>>> undeterminate bugs in callout migration by not K>
>>>>>>>>> doing migration at all anymore. K> K> > K> > You need
>>>>>>>>> to use callout_init_{mtx,rm,rw} and remove the custom
>>>>>>>>> locking K> > inside the callback in the TCP stack to
>>>>>>>>> get it working like before! K> K> No, you need to do
>>>>>>>>> this, if you think that whole callout KPI must be K>
>>>>>>>>> rototiled.  It is up to the person who modifies the
>>>>>>>>> KPI, to ensure that K> existing code is not broken.
>>>>>>>>> K> K> As I understand, currently we are back to the
>>>>>>>>> one-cpu callouts. K> Do other people consider this
>>>>>>>>> situation acceptable ?
>>>>>>>>>
>>>>>>>>> I think this isn't acceptable. The commit to a
>>>>>>>>> complex subsystem lacked a review from persons
>>>>>>>>> involved in the system before. The commit to
>>>>>>>>> subsystem broke consumers of the subsystem and this
>>>>>>>>> was even done not accidentially, but due to Hans not
>>>>>>>>> caring about it.
>>>>>>>>>
>>>>>>>>> As for me this is enough to request a backout, and
>>>>>>>>> let the change back in only after proper review.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Gleb,
>>>>>>>>
>>>>>>>> Backing out my callout API patch means we will for
>>>>>>>> sure re-introduce an unknown callout spinlock hang, as
>>>>>>>> noted to me by several people. What do you think about
>>>>>>>> that? dram Maybe "Jason Wolfe" CC'ed can add to
>>>>>>>> 10-stable w/o my patches:
>>>>>>>>
>>>>>
>>>>> Jason picked up this patch for work and it resolved our
>>>>> instability issues that had remained unsolved for quite some
>>>>> time as reported to freebsd-net:
>>>>>
>>>>> https://lists.freebsd.org/pipermail/freebsd-net/2015-January/040895.html
>>>>>
>>>>>
>>>>>
> This had gone undiagnosed for some time (even with the gracious
>>>>> help of jhb in offline emails, thanks btw!).
>>>>>
>>>>> There's some diagnostics in that email thread that may be of
>>>>> value to you folks for determination of the validity of
>>>>> changing the callout AP

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-20 Thread K. Macy
On Tue, Jan 20, 2015 at 4:22 PM, Sean Bruno  wrote:
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA512
>
> On 01/20/15 15:59, K. Macy wrote:
>> On Tue, Jan 20, 2015 at 3:53 PM, Sean Bruno
>>  wrote: On 01/20/15 15:48, K. Macy wrote:
>>>>> Are any other drivers hitting this? e.g. cxgb/cxgbe?
>>>>>
>>>>> -K
>>>>>
>>
>> Unkown to me.  Nor am I aware of anyone else who ever hit our
>> panics either.  Our environment, and the failure, was only seen in
>> the Intel 10GE space (ixgbe).  This is an artifact of our use
>> cases, and hasn't been expanded nor tested in our environment with
>> other vendor interfaces.
>>
>>> For an ill characterized problem isolated to one environment
>>> this seems like a workaround that should not be part of the
>>> general code base.

>>
>>> -K
>>
>
> There was never any indication in our testing that the driver was
> involved in any way.
>
> In our universe, this commit (right or wrong) resolved our panics.  I
> think that there is some room for improvement based on the commentary
> in this thread, but some people do indeed prefer stability over
> performance.  I hope we can come to a middle ground somewhere here.

I would pick stability over performance any day. However, it _seems_
to me, and maybe I simply don't understand some key details, that the
fix consisted of largely single-threading the callout system. And as I
say I may simply not understand the specifics, but this sort of large
scale disabling does not constitute a fix but is a workaround.  It's
more like disabling preemption because it fixes a panic. Yes, it might
"fix" a whole array of bugs that crop up but it could not be seen as a
fix to an otherwise working system.


-K






>
> sean
>
>>
>> sean
>>
>>>>> On Tue, Jan 20, 2015 at 3:46 PM, Sean Bruno
>>>>>  wrote: On 01/20/15 15:40, K. Macy
>>>>> wrote:
>>>>>>>> I think you're working around driver locking bugs by
>>>>>>>> crippling the callout code.
>>>>>>>>
>>>>>>>> -K
>>>>>>>>
>>>>>
>>>>> We had zero evidence of this.  What leads you down that path?
>>>>> I'm totally open to being wrong, e.g. "yeah, you slowed down
>>>>> things so that you don't hit a race condition"
>>>>>
>>>>> sean
>>>>>
>>>>>>>> On Tue, Jan 20, 2015 at 3:35 PM, Sean Bruno
>>>>>>>>  wrote: On 01/20/15 14:30,
>>>>>>>> Hans Petter Selasky wrote:
>>>>>>>>>>> On 01/20/15 22:11, Gleb Smirnoff wrote:
>>>>>>>>>>>> On Tue, Jan 20, 2015 at 09:51:26AM +0200,
>>>>>>>>>>>> Konstantin Belousov wrote: K> > Like stated in
>>>>>>>>>>>> the manual page, callout_reset_curcpu/on() does
>>>>>>>>>>>> not work K> > with MPSAFE callouts any more! K>
>>>>>>>>>>>> I.e. you 'fixed' some undeterminate bugs in
>>>>>>>>>>>> callout migration by not K> doing migration at
>>>>>>>>>>>> all anymore. K> K> > K> > You need to use
>>>>>>>>>>>> callout_init_{mtx,rm,rw} and remove the custom
>>>>>>>>>>>> locking K> > inside the callback in the TCP
>>>>>>>>>>>> stack to get it working like before! K> K> No,
>>>>>>>>>>>> you need to do this, if you think that whole
>>>>>>>>>>>> callout KPI must be K> rototiled.  It is up to
>>>>>>>>>>>> the person who modifies the KPI, to ensure that
>>>>>>>>>>>> K> existing code is not broken. K> K> As I
>>>>>>>>>>>> understand, currently we are back to the
>>>>>>>>>>>> one-cpu callouts. K> Do other people consider
>>>>>>>>>>>> this situation acceptable ?
>>>>>>>>>>>>
>>>>>>>>>>>> I think this isn't acceptable. The commit to a
>>>>>>>>>>>> complex subsystem lacked a review from persons
>>>>>>>>>>>> involved in the system before. The commit t

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-21 Thread K. Macy
> They originally found that things were spinning for way too long.
>
> Hans found something similar and determined/concluded that the
> migration code in callouts was racy-by-design and dramatically
> simplified it and also put very hard constraints on what is a valid
> situation to support migrating from one callwheel to another. Now we
> have fallout which we can either address or back out until the callout
> stuff is again reviewed/fixed.
>
> I don't think it's as alchemic as is being promoted.


Let's not get drawn into semantic debates. To me alchemy implies
voodoo debugging, whereas this is, in part, disabling functionality
that exposes races - which is more like disabling preemption or fine
grained locking.


Normally I would advocate backing this out on the basis of precedence.
That is to say, back it out so that developers will get it right the
first time. However, it appears that hps@ and some others don't
understand what was done wrong. So I'm going to state some basic
premises and then the implied basic guidelines required of a commit
that were not met. If these guidelines are violated by a change in the
future I will agitate for a rapid back out.

Premises:
A) A performance regression is a bug every bit as much as a locking
race. Stability OR performance (where we are talking about _current_
performance) is a false dichotomy.
   - This means that a change that fixes a bug by introducing a
substantial performance regression does not constitute a fix.
B) Existing behavior and performance characteristics should not be
adversely changed unless there is widespread consensus.
  - Sometimes it is necessary to "break" a KPI but that should not be
discovered by svn or scanning back through the mailing lists.

Guidelines:
A) Performance should not measurably regress.
B) If a KPI changes behavior e.g. callout_reset_on being crippled, all
clients need to be updated so that they maintain their current
behavior by the committer changing the KPI. The client code
maintainers aren't responsible for reacting to these types of changes.
That is OK for marginal architectures like sun4v or pc98.
C) If there is a non-zero possibility that these changes break the
client code, committers who have touched that code in the last few
years should be notified.



HPS: Your change failed to meet these guidelines. Some of us are upset
because these guidelines are fairly fundamental for the on-going
viability of FreeBSD. Due to linguistic / time zone / cultural
differences these expectations have not been adequately communicated
to you. You are not in the USB sandbox where others need for your
support outweighs the inconvenience of random breakage.

It sounds like you are making progress towards updating the concerns
that have been voiced. If kib's observations are in fact comprehensive
then adding a callout_init_cpu function and updating all clients so
that their callouts continue to be scheduled on a CPU other than the
BSP will suffice and we can all move on.

Thanks in advance.


-K
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-21 Thread K. Macy
>> HPS: Your change failed to meet these guidelines. Some of us are upset
>> because these guidelines are fairly fundamental for the on-going
>> viability of FreeBSD. Due to linguistic / time zone / cultural
>> differences these expectations have not been adequately communicated
>> to you. You are not in the USB sandbox where others need for your
>> support outweighs the inconvenience of random breakage.
>>
>> It sounds like you are making progress towards updating the concerns
>> that have been voiced. If kib's observations are in fact comprehensive
>> then adding a callout_init_cpu function and updating all clients so
>> that their callouts continue to be scheduled on a CPU other than the
>> BSP will suffice and we can all move on.
>
> Is there some reason that we can’t back things out, break things down into
> smaller pieces and have everything pass through phabric with a wide
> ranging review? Given the fundamental nature of these changes, they
> really need better review and doing it after the fact seems to be to be
> too risky. I’m not debating that this “fixes” some issues, but given the
> performance regression, it sure seems like we may need a different
> solution to be implemented and hashing that out in a branch might be
> the best approach.

Thank you. A more incremental approach would be appreciated by many of
us. To avoid the bystander effect we can permit explicit timeouts for
review-to-commit (72 hours?) so that we don't collectively end up
sandbagging him.


-K
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Re: svn commit: r277213 - in head: share/man/man9 sys/kern sys/ofed/include/linux sys/sys

2015-01-22 Thread K. Macy
>> Sigh, you still do not understand.  It is your duty to identify all pieces
>> which break after your change.  After that, we can argue whether each of
>> them is critical or not to allow the migration. But this must have been
>> done before the KPI change hit the tree.
>>
>
> Hi,
>
> Are you saying that pieces of code that runs completely unlocked using
> "volatile" as only synchronization mechanism is better than what I would
> call a temporary and hopefully short TCP stack performance loss?
>


Hans - The project has long standing expectations about how changes
are made to core subsystems. When you hear "understand" your ego
intercedes - put that aside. I told you this first this afternoon and
others have repeated it several times. When you change a KPI,
consumers are updated at the same time - _period_. This protocol is
not up for debate. I'm glad that others have the presence of mind and
fortitude to insist on this. Your work is appreciated, but whether or
not you agree about this is not relevant.

We're all sorry if this upsets you but this is only a temporary
setback. Channelling this work through phabricator will go a long way
towards smoothing over the current friction. Think about the greater
goal here, not whether this is "done" now or in a week.

-K
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r344487 - in head/sys: conf gnu/gcov

2019-02-25 Thread K. Macy
> We had a brief discussion of this commit within a subset of core.  This
> addition of GPLv2 code is fine as the code is easily removal to a module
> (per kmoore@) should the day come that we're read to evict all GPL code.

I don't execute the ctors until coverage is enabled because I have to
manually find the symbols. The linker doesn't actually generate a ctor
section for functions in text.startup in spite of what Juniper's
linker commit would lead one to believe - presumably they have a
private linker script in addition to a private gcov port.  Thus, it
really could just work fine as a module. Nonetheless, everything to be
profiled needs to be compiled with instrumentation, so separating it
out makes very little sense to me. Although, I suppose ctfconvert +
dtrace module is somewhat analogous.

> The modest increase in activation energy for that task seems worth it
> for the short-term gains of reduced integration cost (this code will
> greatly improve our ZFS-on-Linux test coverage.)
>
> Rod rightly points out that we haven't accepted SPDX tags alone as
> license statements.  The standard GPL v2.0 boiler plate should be added
> to this file along side the tag.

I've copied the full copyright attribution that is in the
corresponding files on Linux. Is there some reason why FreeBSD
requires the files to be inflated with the full license text where the
original lacks it?

>
> An additional issue is that the a warning tag was not added to
> sys/conf/files.  A warning along the lines of:
>
> warning "kernel contains GPLv2 licensed GCOV"
>
> needs to be added.

Yup.

>
> This commit needed more through review.

How would this be achieved:? I had several people on the review and no
one had substantive feedback.

>
> We intend to update our license policy to require core sign off for
> new GPL code to ensure we're not adding new, tightly integrated
> dependencies, to document that we're doing so knowingly, and
> to make sure steps aren't missed.  The current document is at:
> https://www.freebsd.org/internal/software-license.html

-M
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r300113 - in head/sys: conf kern net sys

2016-05-21 Thread K. Macy
On Sat, May 21, 2016 at 3:36 AM, Hans Petter Selasky  wrote:
> On 05/18/16 06:35, Scott Long wrote:
>>
>> Author: scottl
>> Date: Wed May 18 04:35:58 2016
>> New Revision: 300113
>> URL: https://svnweb.freebsd.org/changeset/base/300113
>>
>> Log:
>>   Import the 'iflib' API library for network drivers.  From the author:
>>
>>   "iflib is a library to eliminate the need for frequently duplicated
>> device
>>   independent logic propagated (poorly) across many network drivers."
>>
>>   Participation is purely optional.  The IFLIB kernel config option is
>>   provided for drivers that want to transition between legacy and iflib
>>   modes of operation.  ixl and ixgbe driver conversions will be committed
>>   shortly.  We hope to see participation from the Broadcom and maybe
>>   Chelsio drivers in the near future.
>>
>>   Submitted by:   mm...@nextbsd.org
>>   Reviewed by:gallatin
>>   Differential Revision:  D5211
>>
>> Added:
>>   head/sys/net/ifdi_if.m   (contents, props changed)
>>   head/sys/net/iflib.c   (contents, props changed)
>>   head/sys/net/iflib.h   (contents, props changed)
>>   head/sys/net/mp_ring.c   (contents, props changed)
>>   head/sys/net/mp_ring.h   (contents, props changed)
>> Modified:
>>   head/sys/conf/files
>>   head/sys/conf/options
>>   head/sys/kern/device_if.m
>>   head/sys/kern/kern_mbuf.c
>>   head/sys/kern/subr_taskqueue.c
>>   head/sys/net/if.c
>>   head/sys/net/if_var.h
>>   head/sys/sys/_task.h
>>   head/sys/sys/mbuf.h
>>   head/sys/sys/taskqueue.h
>>
>
> Hi,
>
> Possibly the taskqueue related changes should have been broken out into a
> separate commit, hence they are not related to "iflib".
>
> --HPS

Yes.
-M
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r300854 - in head/sys: netinet netinet6

2016-05-28 Thread K. Macy
On Friday, May 27, 2016, Bjoern A. Zeeb 
wrote:

>
> > On 28 May 2016, at 00:02 , Gleb Smirnoff  wrote:
> >
> > On Fri, May 27, 2016 at 02:27:45PM -0700, Adrian Chadd wrote:
> > A> Hm, doesnt this make sense to do as part of RO_RTFREE?
> >
> > I agree that it looks messy, but for now we just need to fix instapanic.
> >
> > I will either return to this, or may be melifaro's new routing will
> > outperform FLOWTABLE and we can delete it.
>
> This statement makes no sense to me at this point anymore.
> For local connections you have cached routes;  no lookup will be faster.
>
> For forwarding flowtable should not be used anyway.
>
>

Flowtable does, or at least did (I don't know how much the code has been
handicapped in the meantime), stateful handling of flows. Neither inpcb
caching nor melifaro's routing improvements apply. Perhaps obsolete as the
band aid it was being used for.


What you mean is that with L2 caching in the inPCB, flowtable will become
> obsolete?
>
> /bz
> ___
> svn-src-head@freebsd.org  mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org
> "
>
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r300113 - in head/sys: conf kern net sys

2016-05-18 Thread K. Macy
Those are new. Please just patch iflib to use them and let's move on.
-M

On Wed, May 18, 2016 at 4:58 PM, Gleb Smirnoff  wrote:
>   Hi!
>
> On Wed, May 18, 2016 at 04:35:58AM +, Scott Long wrote:
> S> 
> ==
> S> --- head/sys/net/if.cWed May 18 04:04:14 2016(r300112)
> S> +++ head/sys/net/if.cWed May 18 04:35:58 2016(r300113)
> S> @@ -3900,6 +3900,19 @@ if_multiaddr_count(if_t ifp, int max)
> S>  return (count);
> S>  }
> S>
> S> +int
> S> +if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct 
> ifmultiaddr *, int), void *arg)
> S> +{
> S> +struct ifmultiaddr *ifma;
> S> +int cnt = 0;
> S> +
> S> +if_maddr_rlock(ifp);
> S> +TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
> S> +cnt += filter(arg, ifma, cnt);
> S> +if_maddr_runlock(ifp);
> S> +return (cnt);
> S> +}
> S> +
> S>  struct mbuf *
> S>  if_dequeue(if_t ifp)
> S>  {
>
> In my projects/ifnet a similar functions exist:
>
> /*
>  * Traversing through interface address lists.
>  */
> typedef voidifaddr_cb_t(void *, struct sockaddr *, struct sockaddr *,
> struct sockaddr *);
> typedef voidifmaddr_cb_t(void *, struct sockaddr *);
> voidif_foreach_addr(if_t, ifaddr_cb_t, void *);
> voidif_foreach_maddr(if_t, ifmaddr_cb_t, void *);
>
> /*
>  * Methods for drivers to access interface unicast and multicast
>  * addresses.  Driver do not know 'struct ifaddr' neither 'struct 
> ifmultiaddr'.
>  */
> void
> if_foreach_addr(if_t ifp, ifaddr_cb_t cb, void *cb_arg)
> {
> struct ifaddr *ifa;
>
> IF_ADDR_RLOCK(ifp);
> TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
> (*cb)(cb_arg, ifa->ifa_addr, ifa->ifa_dstaddr,
> ifa->ifa_netmask);
> IF_ADDR_RUNLOCK(ifp);
> }
>
> void
> if_foreach_maddr(if_t ifp, ifmaddr_cb_t cb, void *cb_arg)
> {
> struct ifmultiaddr *ifma;
>
> IF_ADDR_RLOCK(ifp);
> TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
> (*cb)(cb_arg, ifma->ifma_addr);
> IF_ADDR_RUNLOCK(ifp);
> }
>
> Do you mind if I adopt head to them instead of if_multi_apply()?
>
> --
> Totus tuus, Glebius.
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"