bridge diff needs testing
The following diff replaces the existing hash table with a red black tree, to the acclaim of network hackers worldwide. This has already received some testing, but needs some more shakedown in order to make sure that nobody gets hosed by this. Folks with more esoteric (read: vether and/or gif) setups are especially encouraged to test, and may receive cookies. - Bert Index: if_bridge.h === RCS file: /cvs/src/sys/net/if_bridge.h,v retrieving revision 1.32 diff -u -p -r1.32 if_bridge.h --- if_bridge.h 28 Oct 2010 13:49:54 - 1.32 +++ if_bridge.h 29 Oct 2010 19:45:38 - @@ -397,7 +397,7 @@ struct bridge_iflist { * Bridge route node */ struct bridge_rtnode { - LIST_ENTRY(bridge_rtnode) brt_next; /* next in list */ + RB_ENTRY(bridge_rtnode) brt_entry; /* entry in rb tree */ struct ifnet *brt_if; /* destination ifs */ u_int8_tbrt_flags; /* address flags */ u_int8_tbrt_age;/* age counter */ @@ -423,7 +423,7 @@ struct bridge_softc { struct timeout sc_brtimeout; /* timeout state */ struct bstp_state *sc_stp;/* stp state */ LIST_HEAD(, bridge_iflist) sc_iflist; /* interface list */ - LIST_HEAD(, bridge_rtnode) sc_rts[BRIDGE_RTABLE_SIZE]; /* hash table */ + RB_HEAD(brt_tree, bridge_rtnode) sc_rts;/* route tree */ LIST_HEAD(, bridge_iflist) sc_spanlist;/* span ports */ }; Index: if_bridge.c === RCS file: /cvs/src/sys/net/if_bridge.c,v retrieving revision 1.186 diff -u -p -r1.186 if_bridge.c --- if_bridge.c 28 Oct 2010 19:00:57 - 1.186 +++ if_bridge.c 29 Oct 2010 19:45:38 - @@ -45,6 +45,7 @@ #include sys/ioctl.h #include sys/errno.h #include sys/kernel.h +#include sys/tree.h #include machine/cpu.h #include net/if.h @@ -135,7 +136,7 @@ int bridge_rtfind(struct bridge_softc *, void bridge_rtage(struct bridge_softc *); void bridge_rttrim(struct bridge_softc *); intbridge_rtdaddr(struct bridge_softc *, struct ether_addr *); -intbridge_rtflush(struct bridge_softc *, int); +void bridge_rtflush(struct bridge_softc *, int); struct ifnet * bridge_rtupdate(struct bridge_softc *, struct ether_addr *, struct ifnet *ifp, int, u_int8_t); struct ifnet * bridge_rtlookup(struct bridge_softc *, @@ -180,6 +181,16 @@ LIST_HEAD(, bridge_softc) bridge_list; struct if_clone bridge_cloner = IF_CLONE_INITIALIZER(bridge, bridge_clone_create, bridge_clone_destroy); +static __inline int brt_cmp(struct bridge_rtnode *, struct bridge_rtnode *); +RB_PROTOTYPE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); +RB_GENERATE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); + +static __inline int +brt_cmp(struct bridge_rtnode *a, struct bridge_rtnode *b) +{ + return (memcmp(a-brt_addr, b-brt_addr, sizeof(a-brt_addr))); +} + /* ARGSUSED */ void bridgeattach(int n) @@ -194,7 +205,7 @@ bridge_clone_create(struct if_clone *ifc { struct bridge_softc *sc; struct ifnet *ifp; - int i, s; + int s; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); if (!sc) @@ -211,8 +222,7 @@ bridge_clone_create(struct if_clone *ifc timeout_set(sc-sc_brtimeout, bridge_timer, sc); LIST_INIT(sc-sc_iflist); LIST_INIT(sc-sc_spanlist); - for (i = 0; i BRIDGE_RTABLE_SIZE; i++) - LIST_INIT(sc-sc_rts[i]); + RB_INIT(sc-sc_rts); sc-sc_hashkey = arc4random(); ifp = sc-sc_if; snprintf(ifp-if_xname, sizeof ifp-if_xname, %s%d, ifc-ifc_name, @@ -560,7 +570,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c if ((error = suser(curproc, 0)) != 0) break; - error = bridge_rtflush(sc, req-ifbr_ifsflags); + bridge_rtflush(sc, req-ifbr_ifsflags); break; case SIOCBRDGSADDR: if ((error = suser(curproc, 0)) != 0) @@ -1003,7 +1013,7 @@ bridge_output(struct ifnet *ifp, struct struct ifnet *dst_if; struct ether_addr *dst; struct bridge_softc *sc; - int s, error, len; + int s, error; #ifdef IPSEC struct m_tag *mtag; #endif /* IPSEC */ @@ -1109,33 +1119,12 @@ bridge_output(struct ifnet *ifp, struct used = 1; mc = m; } else { - struct mbuf *m1, *m2, *mx; - - m1 = m_copym2(m, 0, ETHER_HDR_LEN, - M_DONTWAIT); - if (m1 == NULL) { - sc-sc_if.if_oerrors++; - continue; -
pf ip reassembly test awesomization
There's no need to walk the entire fragment list to determine if we have all the bytes of the unfragmented packet if we keep a running count on how much we've seen already. Please to be testing as we need to make sure that our userbase remains unmolested. - Bert Index: pf_norm.c === RCS file: /cvs/src/sys/net/pf_norm.c,v retrieving revision 1.132 diff -u -p -r1.132 pf_norm.c --- pf_norm.c 23 Apr 2011 10:00:36 - 1.132 +++ pf_norm.c 16 May 2011 15:35:20 - @@ -97,6 +97,7 @@ struct pf_fragment { TAILQ_ENTRY(pf_fragment) frag_next; u_int32_t fr_timeout; u_int16_t fr_maxlen; /* maximum length of single fragment */ + u_int16_t fr_qlen;/* length of current queue */ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; }; @@ -322,6 +323,7 @@ pf_fillup_fragment(struct pf_fragment_cm *(struct pf_fragment_cmp *)frag = *key; frag-fr_timeout = time_second; frag-fr_maxlen = frent-fe_len; + frag-fr_qlen = frent-fe_len; TAILQ_INIT(frag-fr_queue); RB_INSERT(pf_frag_tree, pf_frag_tree, frag); @@ -401,6 +403,9 @@ pf_fillup_fragment(struct pf_fragment_cm pf_nfrents--; } + /* Adjust queue length */ + frag-fr_qlen += frent-fe_len; + if (prev == NULL) TAILQ_INSERT_HEAD(frag-fr_queue, frent, fr_next); else @@ -419,8 +424,7 @@ pf_fillup_fragment(struct pf_fragment_cm int pf_isfull_fragment(struct pf_fragment *frag) { - struct pf_frent *frent, *next; - u_int16_toff, total; + u_int16_ttotal; /* Check if we are completely reassembled */ if (TAILQ_LAST(frag-fr_queue, pf_fragq)-fe_mff) @@ -431,22 +435,14 @@ pf_isfull_fragment(struct pf_fragment *f TAILQ_LAST(frag-fr_queue, pf_fragq)-fe_len; /* Check if we have all the data */ - off = 0; - for (frent = TAILQ_FIRST(frag-fr_queue); frent; frent = next) { - next = TAILQ_NEXT(frent, fr_next); - - off += frent-fe_len; - if (off total (next == NULL || next-fe_off != off)) { - DPFPRINTF(LOG_NOTICE, - missing fragment at %d, next %d, total %d, - off, next == NULL ? -1 : next-fe_off, total); - return (0); - } - } - DPFPRINTF(LOG_NOTICE, %d %d?, off, total); - if (off total) + if (frag-fr_qlen total) { + DPFPRINTF(LOG_NOTICE, + missing fragment: need %d have %d, total, frag-fr_qlen); return (0); - KASSERT(off == total); + } + + DPFPRINTF(LOG_NOTICE, %d %d?, frag-fr_qlen, total); + KASSERT(frag-fr_qlen == total); return (1); }
Re: bridge diff needs testing
On Wed, Jun 22, 2011 at 01:21:28PM +0200, Camiel Dobbelaar wrote: Hi Bret, one comment from inspection, is the part below related to the RB tree? Or a seperate optimization? On 22-6-2011 12:21, Bret S. Lambert wrote: @@ -1109,33 +1119,12 @@ bridge_output(struct ifnet *ifp, struct used = 1; mc = m; } else { - struct mbuf *m1, *m2, *mx; - - m1 = m_copym2(m, 0, ETHER_HDR_LEN, - M_DONTWAIT); - if (m1 == NULL) { - sc-sc_if.if_oerrors++; - continue; - } - m2 = m_copym2(m, ETHER_HDR_LEN, + mc = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); - if (m2 == NULL) { - m_freem(m1); + if (mc == NULL) { sc-sc_if.if_oerrors++; continue; } - - for (mx = m1; mx-m_next != NULL; mx = mx-m_next) - /*EMPTY*/; - mx-m_next = m2; - - if (m1-m_flags M_PKTHDR) { - len = 0; - for (mx = m1; mx != NULL; mx = mx-m_next) - len += mx-m_len; - m1-m_pkthdr.len = len; - } - mc = m1; } error = bridge_ifenqueue(sc, dst_if, mc); Ack; had sent an older diff before I had ripped this out. Please ignore, and use the following updated diff. Apologies for the noise. Index: if_bridge.h === RCS file: /cvs/src/sys/net/if_bridge.h,v retrieving revision 1.34 diff -u -p -r1.34 if_bridge.h --- if_bridge.h 20 Nov 2010 14:23:09 - 1.34 +++ if_bridge.h 22 Jun 2011 12:46:52 - @@ -396,7 +396,7 @@ struct bridge_iflist { * Bridge route node */ struct bridge_rtnode { - LIST_ENTRY(bridge_rtnode) brt_next; /* next in list */ + RB_ENTRY(bridge_rtnode) brt_entry; /* entry in rb tree */ struct ifnet *brt_if; /* destination ifs */ u_int8_tbrt_flags; /* address flags */ u_int8_tbrt_age;/* age counter */ @@ -422,7 +422,7 @@ struct bridge_softc { struct timeout sc_brtimeout; /* timeout state */ struct bstp_state *sc_stp;/* stp state */ LIST_HEAD(, bridge_iflist) sc_iflist; /* interface list */ - LIST_HEAD(, bridge_rtnode) sc_rts[BRIDGE_RTABLE_SIZE]; /* hash table */ + RB_HEAD(brt_tree, bridge_rtnode) sc_rts;/* route tree */ LIST_HEAD(, bridge_iflist) sc_spanlist;/* span ports */ }; Index: if_bridge.c === RCS file: /cvs/src/sys/net/if_bridge.c,v retrieving revision 1.188 diff -u -p -r1.188 if_bridge.c --- if_bridge.c 4 Nov 2010 23:07:15 - 1.188 +++ if_bridge.c 22 Jun 2011 12:46:52 - @@ -45,6 +45,7 @@ #include sys/ioctl.h #include sys/errno.h #include sys/kernel.h +#include sys/tree.h #include machine/cpu.h #include net/if.h @@ -135,7 +136,7 @@ int bridge_rtfind(struct bridge_softc *, void bridge_rtage(struct bridge_softc *); void bridge_rttrim(struct bridge_softc *); intbridge_rtdaddr(struct bridge_softc *, struct ether_addr *); -intbridge_rtflush(struct bridge_softc *, int); +void bridge_rtflush(struct bridge_softc *, int); struct ifnet * bridge_rtupdate(struct bridge_softc *, struct ether_addr *, struct ifnet *ifp, int, u_int8_t); struct ifnet * bridge_rtlookup(struct bridge_softc *, @@ -180,6 +181,16 @@ LIST_HEAD(, bridge_softc) bridge_list; struct if_clone bridge_cloner = IF_CLONE_INITIALIZER(bridge, bridge_clone_create, bridge_clone_destroy); +static __inline int brt_cmp(struct bridge_rtnode *, struct bridge_rtnode *); +RB_PROTOTYPE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); +RB_GENERATE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); + +static __inline int +brt_cmp(struct bridge_rtnode *a, struct bridge_rtnode *b) +{ + return (memcmp(a-brt_addr, b-brt_addr, sizeof(a-brt_addr))); +} + /* ARGSUSED */ void bridgeattach(int n) @@ -193,7 +204,7 @@ bridge_clone_create(struct if_clone *ifc { struct bridge_softc *sc; struct ifnet *ifp; - int i, s; + int s; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT
disgusting routing/network macro cleanup...now with booting kernels
Cleanup of some horrible macros pulled into the network code from the horrible radix code. Now boots and runs! And sends email! Hi, otto@! Index: net/radix.c === RCS file: /cvs/src/sys/net/radix.c,v retrieving revision 1.28 diff -u -p -r1.28 radix.c --- net/radix.c 22 Aug 2010 17:02:04 - 1.28 +++ net/radix.c 4 Apr 2011 18:29:51 - @@ -413,9 +413,9 @@ rn_addmask(void *n_arg, int search, int if (mlen = skip) return (mask_rnhead-rnh_nodes); if (skip 1) - Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); + bcopy(rn_ones + 1, addmask_key + 1, skip - 1); if ((m0 = mlen) skip) - Bcopy(netmask + skip, addmask_key + skip, mlen - skip); + bcopy(netmask + skip, addmask_key + skip, mlen - skip); /* * Trim trailing zeroes. */ @@ -428,23 +428,23 @@ rn_addmask(void *n_arg, int search, int return (mask_rnhead-rnh_nodes); } if (m0 last_zeroed) - Bzero(addmask_key + m0, last_zeroed - m0); + bzero(addmask_key + m0, last_zeroed - m0); *addmask_key = last_zeroed = mlen; x = rn_search(addmask_key, rn_masktop); if (Bcmp(addmask_key, x-rn_key, mlen) != 0) x = 0; if (x || search) return (x); - R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); + x = malloc(max_keylen + 2 * sizeof(*x), M_RTABLE, M_DONTWAIT); if ((saved_x = x) == 0) return (0); - Bzero(x, max_keylen + 2 * sizeof (*x)); + bzero(x, (unsigned)(max_keylen + 2 * sizeof(*x))); netmask = cp = (caddr_t)(x + 2); - Bcopy(addmask_key, cp, mlen); + bcopy(addmask_key, cp, mlen); x = rn_insert(cp, mask_rnhead, maskduplicated, x); if (maskduplicated) { log(LOG_ERR, rn_addmask: mask impossibly already in tree\n); - Free(saved_x); + free(saved_x, M_RTABLE); return (x); } /* @@ -500,7 +500,7 @@ rn_new_radix_mask(struct radix_node *tt, log(LOG_ERR, Mask for route not entered\n); return (0); } - Bzero(m, sizeof *m); + bzero(m, sizeof *m); m-rm_b = tt-rn_b; m-rm_flags = tt-rn_flags; if (tt-rn_flags RNF_NORMAL) @@ -1003,7 +1003,7 @@ rn_inithead(void **head, int off) if (*head) return (1); - R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); + rnh = malloc(sizeof(*rnh), M_RTABLE, M_DONTWAIT); if (rnh == 0) return (0); *head = rnh; @@ -1015,7 +1015,7 @@ rn_inithead0(struct radix_node_head *rnh { struct radix_node *t, *tt, *ttt; - Bzero(rnh, sizeof (*rnh)); + bzero(rnh, sizeof (*rnh)); t = rn_newpair(rn_zeros, off, rnh-rnh_nodes); ttt = rnh-rnh_nodes + 2; t-rn_r = ttt; @@ -1050,10 +1050,10 @@ rn_init() rn_init: radix functions require max_keylen be set\n); return; } - R_Malloc(rn_zeros, char *, 3 * max_keylen); + rn_zeros = malloc(3 * max_keylen, M_RTABLE, M_DONTWAIT); if (rn_zeros == NULL) panic(rn_init); - Bzero(rn_zeros, 3 * max_keylen); + bzero(rn_zeros, 3 * max_keylen); rn_ones = cp = rn_zeros + max_keylen; addmask_key = cplim = rn_ones + max_keylen; while (cp cplim) Index: net/radix.h === RCS file: /cvs/src/sys/net/radix.h,v retrieving revision 1.16 diff -u -p -r1.16 radix.h --- net/radix.h 28 Jun 2010 18:50:37 - 1.16 +++ net/radix.h 4 Apr 2011 18:29:51 - @@ -98,7 +98,7 @@ extern struct radix_mask { m = rn_mkfreelist; \ rn_mkfreelist = (m)-rm_mklist; \ } else \ - R_Malloc(m, struct radix_mask *, sizeof (*(m)));\ + m = malloc(sizeof(*(m)), M_RTABLE, M_DONTWAIT); \ } while (0) #define MKFree(m) do { \ @@ -132,11 +132,6 @@ struct radix_node_head { }; #ifdef _KERNEL -#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) -#define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) -#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n)); -#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_DONTWAIT)) -#define Free(p) free((caddr_t)p, M_RTABLE); void rn_init(void); intrn_inithead(void **, int); Index: net/route.c === RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.130 diff -u -p -r1.130 route.c --- net/route.c 4 Apr 2011
Re: NFS writes lock up system with -o tcp,-w32768
On Wed, Mar 30, 2011 at 09:54:45PM +0200, Claudio Jeker wrote: On Wed, Mar 30, 2011 at 08:36:45PM +0200, Walter Haidinger wrote: Am 30.03.2011 15:23, schrieb Claudio Jeker: Buffers below 8k are stupid. For TCP just use 32k or even 64k. 512byte buffers are silly. They get internally rounded up since the smallest packet seems to be 512bytes data plus header. This will give you TCP send and recv buffers of around 1200bytes. No wonder it is slow as hell. Throughput isn't the issue. The system gets unusable with sizes 2048. The machine freezes, it takes a couple of seconds for the next shell prompt to appear, like under really heavy load (I'd say way 30). Of course bufsizes that small make no sense and your patch eliminates the lock ups, but the they show there is still some bug. I'd expect slow nfs transfers but not the behavior as if under heavy load. (*) NFS is a strange beast and I guess running with to small buffers results in such side effects. This has nothing todo with the buffer scaling but more with the way NFS works. NFS has enough bugs to open a special exhibit at the zoo. I have no idea if I'll ever have enough courage to dive back into it again. This is just to let to know, maybe you want to have a further look. I'm not interested. Maybe someone else likes to dig deep into NFS. I guess there is a reason why the default is 8k. Why did I test with small buffer sizes too? Well, I got another email which said about the mount options, obviously regarding the buffer sizes: When you jackfuck that knob with other values, what is the result? Troubleshooting isn't only for others, son! A reminder that this is an OpenBSD list... ;-) Luckily I always make sure to have my asbestos on when dealing with! -- :wq Claudio
move bridge to rb-tree vice hash
Please test to make sure this breaks no setups and/or pleases you aesthetically as it does me. - Bert Index: net/if_bridge.c === RCS file: /cvs/src/sys/net/if_bridge.c,v retrieving revision 1.188 diff -u -p -r1.188 if_bridge.c --- net/if_bridge.c 4 Nov 2010 23:07:15 - 1.188 +++ net/if_bridge.c 29 Mar 2011 19:51:56 - @@ -45,6 +45,7 @@ #include sys/ioctl.h #include sys/errno.h #include sys/kernel.h +#include sys/tree.h #include machine/cpu.h #include net/if.h @@ -135,7 +136,7 @@ int bridge_rtfind(struct bridge_softc *, void bridge_rtage(struct bridge_softc *); void bridge_rttrim(struct bridge_softc *); intbridge_rtdaddr(struct bridge_softc *, struct ether_addr *); -intbridge_rtflush(struct bridge_softc *, int); +void bridge_rtflush(struct bridge_softc *, int); struct ifnet * bridge_rtupdate(struct bridge_softc *, struct ether_addr *, struct ifnet *ifp, int, u_int8_t); struct ifnet * bridge_rtlookup(struct bridge_softc *, @@ -180,6 +181,16 @@ LIST_HEAD(, bridge_softc) bridge_list; struct if_clone bridge_cloner = IF_CLONE_INITIALIZER(bridge, bridge_clone_create, bridge_clone_destroy); +static __inline int brt_cmp(struct bridge_rtnode *, struct bridge_rtnode *); +RB_PROTOTYPE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); +RB_GENERATE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); + +static __inline int +brt_cmp(struct bridge_rtnode *a, struct bridge_rtnode *b) +{ + return (memcmp(a-brt_addr, b-brt_addr, sizeof(a-brt_addr))); +} + /* ARGSUSED */ void bridgeattach(int n) @@ -193,7 +204,7 @@ bridge_clone_create(struct if_clone *ifc { struct bridge_softc *sc; struct ifnet *ifp; - int i, s; + int s; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); if (!sc) @@ -210,8 +221,7 @@ bridge_clone_create(struct if_clone *ifc timeout_set(sc-sc_brtimeout, bridge_timer, sc); LIST_INIT(sc-sc_iflist); LIST_INIT(sc-sc_spanlist); - for (i = 0; i BRIDGE_RTABLE_SIZE; i++) - LIST_INIT(sc-sc_rts[i]); + RB_INIT(sc-sc_rts); sc-sc_hashkey = arc4random(); ifp = sc-sc_if; snprintf(ifp-if_xname, sizeof ifp-if_xname, %s%d, ifc-ifc_name, @@ -559,7 +569,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c if ((error = suser(curproc, 0)) != 0) break; - error = bridge_rtflush(sc, req-ifbr_ifsflags); + bridge_rtflush(sc, req-ifbr_ifsflags); break; case SIOCBRDGSADDR: if ((error = suser(curproc, 0)) != 0) @@ -1774,151 +1784,46 @@ struct ifnet * bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, struct ifnet *ifp, int setflags, u_int8_t flags) { - struct bridge_rtnode *p, *q; - u_int32_t h; - int dir; - - h = bridge_hash(sc, ea); - p = LIST_FIRST(sc-sc_rts[h]); - if (p == LIST_END(sc-sc_rts[h])) { + struct bridge_rtnode *brt, tmp; + + bcopy(ea, tmp.brt_addr, sizeof(*ea)); + + if ((brt = RB_FIND(brt_tree, sc-sc_rts, tmp)) == NULL) { + if (sc-sc_brtcnt = sc-sc_brtmax) - goto done; - p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT); - if (p == NULL) - goto done; + goto fail; + if ((brt = malloc(sizeof(*brt), M_DEVBUF, M_NOWAIT)) == NULL) + goto fail; - bcopy(ea, p-brt_addr, sizeof(p-brt_addr)); - p-brt_if = ifp; - p-brt_age = 1; + bcopy(ea, brt-brt_addr, sizeof(brt-brt_addr)); + brt-brt_if = ifp; + brt-brt_age = 1; if (setflags) - p-brt_flags = flags; + brt-brt_flags = flags; else - p-brt_flags = IFBAF_DYNAMIC; + brt-brt_flags = IFBAF_DYNAMIC; - LIST_INSERT_HEAD(sc-sc_rts[h], p, brt_next); + RB_INSERT(brt_tree, sc-sc_rts, brt); sc-sc_brtcnt++; - goto want; } - do { - q = p; - p = LIST_NEXT(p, brt_next); - - dir = memcmp(ea, q-brt_addr, sizeof(q-brt_addr)); - if (dir == 0) { - if (setflags) { - q-brt_if = ifp; - q-brt_flags = flags; - } else if (!(q-brt_flags IFBAF_STATIC)) - q-brt_if = ifp; - - if (q-brt_if == ifp) - q-brt_age = 1; - ifp = q-brt_if; - goto want; - } - - if (dir 0) { - if (sc-sc_brtcnt = sc-sc_brtmax) - goto done; -
Re: SCHED_ASSERT_UNLOCKED is considered harmful in the _kernel_lock()
Well, it's definitely a race, and the potential deadlock is handled in mi_switch(), so my vote is for good intention, bad judgement on the assertions in kern_lock.c On Fri, Nov 05, 2010 at 05:52:23PM +0100, Mike Belopuhov wrote: hi, we've finally got an interesting panic that shed some light on the 'kernel diagnostic assertion __mp_lock_held(sched_lock) == 0 failed' panics that plagued some heavily loaded systems. apparently, there's a window in mi_switch (right after cpu_switchto) that has sched_lock held but kernel_lock released, thus allowing other cpus to take advantage of the kernel_lock: cpu_switchto(p, nextproc); } else { p-p_stat = SONPROC; } clear_resched(curcpu()); SCHED_ASSERT_LOCKED(); /* * To preserve lock ordering, we need to release the sched lock * and grab it after we grab the big lock. * In the future, when the sched lock isn't recursive, we'll * just release it here. */ #ifdef MULTIPROCESSOR __mp_unlock(sched_lock); #endif unfortunately they can't do this because we check for a sched_lock being held: void _kernel_lock(void) { SCHED_ASSERT_UNLOCKED(); __mp_lock(kernel_lock); } it should be safe to take kernel_lock there because mi_switch: 1) doesn't do anything nasty there; 2) figures out whether process should take a kernel_lock later on: #ifdef MULTIPROCESSOR /* * Reacquire the kernel_lock now. We do this after we've * released the scheduler lock to avoid deadlock, and before * we reacquire the interlock and the scheduler lock. */ if (p-p_flag P_BIGLOCK) __mp_acquire_count(kernel_lock, hold_count); __mp_acquire_count(sched_lock, sched_count + 1); #endif opinions? should we just remove an assertion? or am i trippin' on some high quality drugs? (: ddb log session: ddb{0} tr Debugger(d08cc9e6,e08b7e34,d08aaf94,e08b7e34,d0a44e28) at Debugger+0x4 panic(d08aaf94,d08346ee,d08a80e0,d08a83ed,16a) at panic+0x5d __assert(d08346ee,d08a83ed,16a,d08a80e0,0) at __assert+0x2e _kernel_lock(1,0,d09d15cc,7a120,0) at _kernel_lock+0x48 trap() at trap+0x5fb --- trap (number -547639296) --- Bad frame pointer: 0xd0ad48a0 0: ddb{0} show panic kernel diagnostic assertion __mp_lock_held(sched_lock) == 0 failed: file .. /../../../kern/kern_lock.c, line 362 ddb{0} show ps No such command ddb{0} ps PID PPID PGRPUID S FLAGS WAIT COMMAND 2111 1 2111 0 3 0x2040180 selectsendmail 12050 1 12050 0 3 0x2004080 ttyin getty 3065 1 3065 0 3 0x2004080 ttyin getty 2937 1 2937 0 3 0x2004080 ttyin getty 15153 1 15153 0 3 0x2004080 ttyin getty 24343 1 24343 0 3 0x2004080 ttyin getty 631 1631 0 3 0x2004080 ttyin getty 18404 1 18404 0 3 0x280 selectcron 7380 1 7380 0 3 0x2000180 selectinetd 23123 1 23123 0 3 0x280 selectsshd 8900 27266 27156 83 3 0x2000180 poll ntpd 27266 27156 27156 83 3 0x2000180 poll ntpd 27156 1 27156 0 3 0x280 poll ntpd 22867 2 2 74 3 0x2000180 bpf pflogd 2 1 2 0 3 0x280 netio pflogd 15707 20626 20626 73 3 0x2000180 poll syslogd 20626 1 20626 0 3 0x288 netio syslogd 29035 1 29035 77 3 0x2000180 poll dhclient 19499 1 27610 0 3 0x280 poll dhclient 7362 1 7362 77 3 0x2000180 poll dhclient 22676 1 27610 0 3 0x280 poll dhclient 19 0 0 0 3 0x2100200 aiodoned aiodoned 18 0 0 0 3 0x2100200 syncerupdate 17 0 0 0 3 0x2100200 cleaner cleaner 16 0 0 0 30x100200 reaperreaper 15 0 0 0 3 0x2100200 pgdaemon pagedaemon 14 0 0 0 3 0x2100200 bored crypto 13 0 0 0 3 0x2100200 pftm pfpurge 12 0 0 0 3 0x2100200 usbtskusbtask 11 0 0 0 3 0x2100200 usbatsk usbatsk 10 0 0 0 3 0x2100200 acpi0 acpi0 9 0 0 0 7 0x40100200idle5 8 0 0 0 7 0x40100200idle4 7 0
Re: fold -b1 segmentation fault
On Thu, Oct 21, 2010 at 05:57:24PM +0100, Jason McIntyre wrote: in case anyone's interested, and as reported in a recent freebsd pr: $ fold -b1 Segmentation fault (core dumped) no fix is provided. jmc Number: 151592 Category: misc Synopsis: 'fold' segfaults on argument processing Confidential: no Severity: non-critical Priority: low Responsible:freebsd-bugs State: open Quarter: Keywords: Date-Required: Class: sw-bug Submitter-Id: current-users Arrival-Date: Wed Oct 20 03:50:08 UTC 2010 Closed-Date: Last-Modified: Originator: Marcus Reid Release:8.1-STABLE Organization: Environment: FreeBSD austin.sea.netifice.com 8.1-STABLE FreeBSD 8.1-STABLE #0: Mon Sep 20 23:53:47 PDT 2010 r...@austin.sea.netifice.com:/usr/obj/usr/src/sys/FARK amd64 Description: The 'fold' utility reads past the end of a buffer if arguments are incorrectly specified. If you pass an argument to '-b' that happens to be in the character set 0123456789, line 101 reads past the end of argv[optind] and causes a segmentation fault. How-To-Repeat: Simply run 'fold -b1' Fix: I don't really see what the author is intending to do in that case statement where it breaks. Release-Note: Audit-Trail: Unformatted: Fix appears to be to let getopt handle it, since allowing 0-9 as an argument is not documented: Index: fold.c === RCS file: /cvs/src/usr.bin/fold/fold.c,v retrieving revision 1.12 diff -u -p -r1.12 fold.c --- fold.c 27 Oct 2009 23:59:38 - 1.12 +++ fold.c 21 Oct 2010 17:26:24 - @@ -57,7 +57,7 @@ main(int argc, char *argv[]) const char *errstr; width = -1; - while ((ch = getopt(argc, argv, 0123456789bsw:)) != -1) + while ((ch = getopt(argc, argv, bsw:)) != -1) switch (ch) { case 'b': count_bytes = 1; @@ -70,21 +70,6 @@ main(int argc, char *argv[]) if (errstr != NULL) errx(1, illegal width value, %s: %s, errstr, optarg); - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if (width == -1) { - p = argv[optind - 1]; - if (p[0] == '-' p[1] == ch !p[2]) - w = ++p; - else - w = argv[optind] + 1; - - width = strtonum(w, 1, INT_MAX, errstr); - if (errstr != NULL) - errx(1, illegal width value, %s: %s, - errstr, optarg); - } break; default: (void)fprintf(stderr,
Re: Testers needed for strict locking diff; esp i386, amd64, softraid
On Thu, Sep 23, 2010 at 10:49:01PM -0700, Matthew Dempsky wrote: I'd like to commit this. I've received positive reports from a few amd64 users and an i386 and softraid user, and all of the locking bugs exposed so far have already been fixed. It's already exposed bugs in limited testing, so casting a wider net (i.e., getting it into snaps) with it makes sense to me. It also reminds me that I at one point intended to figure out how to support enforcing dependencies between mutexes... I plan to remove the #define panic() hacks and let future locking problems actually panic; if anyone thinks they should stay in for a bit longer, let me know. ok? On Mon, Sep 20, 2010 at 08:00:27PM -0700, Matthew Dempsky wrote: I'd really appreciate if people could test the diffs below and report back to me. This adds some warnings about lock API misuse that has already caught two bugs in DRM. Eventually, I want these to be panics instead, but using just warnings for now should make it easier for people to test. Please apply the diff below to an up-to-date -current kernel and check dmesg for any stack backtraces. If you see any, please send them to me. I'd especially like reports from both i386 and amd64, and also from softraid users. More details on the diff: this adds per-processor mutex lock counting to i386 and amd64, and updates assertwaitok() to check that this value is 0. They also now check that the process that calls mtx_leave() is the same that called mtx_enter(). rw_exit*() are changed to call rw_assert_{rd,wr}lock() as appropriate. Finally, mi_switch() also calls assertwaitok() now. Index: kern/subr_xxx.c === RCS file: /cvs/src/sys/kern/subr_xxx.c,v retrieving revision 1.10 diff -u -p -r1.10 subr_xxx.c --- kern/subr_xxx.c 21 Sep 2010 01:09:10 - 1.10 +++ kern/subr_xxx.c 21 Sep 2010 01:45:59 - @@ -153,6 +153,9 @@ blktochr(dev_t dev) return (NODEV); } +#include ddb/db_output.h +#define panic(x...) do { printf(x); db_stack_dump(); } while (0); + /* * Check that we're in a context where it's okay to sleep. */ @@ -160,4 +163,9 @@ void assertwaitok(void) { splassert(IPL_NONE); +#ifdef __HAVE_CPU_MUTEX_LEVEL + if (curcpu()-ci_mutex_level != 0) + panic(assertwaitok: non-zero mutex count: %d, + curcpu()-ci_mutex_level); +#endif } Index: kern/kern_rwlock.c === RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.15 diff -u -p -r1.15 kern_rwlock.c --- kern/kern_rwlock.c 13 Aug 2009 23:12:15 - 1.15 +++ kern/kern_rwlock.c 21 Sep 2010 01:45:59 - @@ -107,6 +107,8 @@ rw_exit_read(struct rwlock *rwl) { unsigned long owner = rwl-rwl_owner; + rw_assert_rdlock(rwl); + if (__predict_false((owner RWLOCK_WAIT) || rw_cas(rwl-rwl_owner, owner, owner - RWLOCK_READ_INCR))) rw_exit(rwl); @@ -117,6 +119,8 @@ rw_exit_write(struct rwlock *rwl) { unsigned long owner = rwl-rwl_owner; + rw_assert_wrlock(rwl); + if (__predict_false((owner RWLOCK_WAIT) || rw_cas(rwl-rwl_owner, owner, 0))) rw_exit(rwl); @@ -236,6 +240,11 @@ rw_exit(struct rwlock *rwl) int wrlock = owner RWLOCK_WRLOCK; unsigned long set; + if (wrlock) + rw_assert_wrlock(rwl); + else + rw_assert_rdlock(rwl); + do { owner = rwl-rwl_owner; if (wrlock) @@ -249,6 +258,10 @@ rw_exit(struct rwlock *rwl) wakeup(rwl); } +#include ddb/db_output.h +#define panic(x...) do { printf(x); db_stack_dump(); } while (0); + +#ifdef DIAGNOSTIC void rw_assert_wrlock(struct rwlock *rwl) { @@ -272,3 +285,4 @@ rw_assert_unlocked(struct rwlock *rwl) if (rwl-rwl_owner != 0L) panic(%s: lock held, rwl-rwl_name); } +#endif Index: kern/sched_bsd.c === RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.23 diff -u -p -r1.23 sched_bsd.c --- kern/sched_bsd.c30 Jun 2010 22:38:17 - 1.23 +++ kern/sched_bsd.c21 Sep 2010 01:45:59 - @@ -356,6 +356,7 @@ mi_switch(void) int sched_count; #endif + assertwaitok(); KASSERT(p-p_stat != SONPROC); SCHED_ASSERT_LOCKED(); Index: arch/amd64/include/cpu.h === RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.55 diff -u -p -r1.55 cpu.h --- arch/amd64/include/cpu.h11 Aug 2010 21:22:44 - 1.55 +++ arch/amd64/include/cpu.h21 Sep 2010 01:46:00 - @@ -87,6 +87,10 @@ struct cpu_info { int
Re: m_pad: ghostbuster's dream
On Wed, Sep 22, 2010 at 06:28:38PM +0200, Mike Belopuhov wrote: m_pad was introduced with an ipsec package import in openbsd in 1997. m_inject was introduced in 1999 but this code wasn't changed. m_pad is equivalent to m_inject with an offset equal to the actual data length. Doesn't break anything here, but additional tests won't harm. No objection in principle (consolidation of code in mbufs is one of my currently-stalled-by-dayjob projects); comment inline. OK? Index: netinet/ip_esp.c === RCS file: /home/cvs/src/sys/netinet/ip_esp.c,v retrieving revision 1.112 diff -u -p -r1.112 ip_esp.c --- netinet/ip_esp.c 22 Sep 2010 13:40:05 - 1.112 +++ netinet/ip_esp.c 22 Sep 2010 15:47:38 - @@ -932,9 +932,10 @@ esp_output(struct mbuf *m, struct tdb *t * Add padding -- better to do it ourselves than use the crypto engine, * although if/when we support compression, we'd have to do that. */ - pad = (u_char *) m_pad(m, padding + alen); + mo = m_inject(m, m-m_pkthdr.len, padding + alen, M_DONTWAIT); + pad = mtod(mo, u_char *); Sure, but this code creates a NULL pointer dereference; mtod() isn't a function that does anything smart, as is evidenced by the explanation in the mbuf man page: #define mtod(m,t) ((t)((m)-m_data)) if (pad == NULL) { - DPRINTF((esp_output(): m_pad() failed for SA %s/%08x\n, + DPRINTF((esp_output(): m_inject failed for SA %s/%08x\n, ipsp_address(tdb-tdb_dst), ntohl(tdb-tdb_spi))); return ENOBUFS; } @@ -1178,77 +1179,4 @@ checkreplaywindow32(u_int32_t seq, u_int *bitmap |= (((u_int32_t) 1) diff); return 0; -} - -/* - * m_pad(m, n) pads m with n bytes at the end. The packet header - * length is updated, and a pointer to the first byte of the padding - * (which is guaranteed to be all in one mbuf) is returned. - */ - -caddr_t -m_pad(struct mbuf *m, int n) -{ - struct mbuf *m0, *m1; - int len, pad; - caddr_t retval; - - if (n = 0) { /* No stupid arguments. */ - DPRINTF((m_pad(): pad length invalid (%d)\n, n)); - m_freem(m); - return NULL; - } - - len = m-m_pkthdr.len; - pad = n; - m0 = m; - - while (m0-m_len len) { - len -= m0-m_len; - m0 = m0-m_next; - } - - if (m0-m_len != len) { - DPRINTF((m_pad(): length mismatch (should be %d instead of - %d)\n, m-m_pkthdr.len, - m-m_pkthdr.len + m0-m_len - len)); - - m_freem(m); - return NULL; - } - - /* Check for zero-length trailing mbufs, and find the last one. */ - for (m1 = m0; m1-m_next; m1 = m1-m_next) { - if (m1-m_next-m_len != 0) { - DPRINTF((m_pad(): length mismatch (should be %d - instead of %d)\n, m-m_pkthdr.len, - m-m_pkthdr.len + m1-m_next-m_len)); - - m_freem(m); - return NULL; - } - - m0 = m1-m_next; - } - - if ((m0-m_flags M_EXT) || - m0-m_data + m0-m_len + pad = (m0-m_dat[MLEN])) { - /* Add an mbuf to the chain. */ - MGET(m1, M_DONTWAIT, MT_DATA); - if (m1 == 0) { - m_freem(m0); - DPRINTF((m_pad(): cannot append\n)); - return NULL; - } - - m0-m_next = m1; - m0 = m1; - m0-m_len = 0; - } - - retval = m0-m_data + m0-m_len; - m0-m_len += pad; - m-m_pkthdr.len += pad; - - return retval; } Index: netinet/ip_ipsp.h === RCS file: /home/cvs/src/sys/netinet/ip_ipsp.h,v retrieving revision 1.144 diff -u -p -r1.144 ip_ipsp.h --- netinet/ip_ipsp.h 9 Jul 2010 16:58:06 - 1.144 +++ netinet/ip_ipsp.h 22 Sep 2010 15:49:32 - @@ -624,9 +624,6 @@ extern int tcp_signature_tdb_input(struc extern int tcp_signature_tdb_output(struct mbuf *, struct tdb *, struct mbuf **, int, int); -/* Padding */ -extern caddr_t m_pad(struct mbuf *, int); - /* Replay window */ extern int checkreplaywindow32(u_int32_t, u_int32_t, u_int32_t *, u_int32_t, u_int32_t *, int);
Re: m_pad: ghostbuster's dream
On Thu, Sep 23, 2010 at 11:12:48AM +0200, Mike Belopuhov wrote: On Thu, Sep 23, 2010 at 09:57 +0200, Bret S. Lambert wrote: No objection in principle (consolidation of code in mbufs is one of my currently-stalled-by-dayjob projects); comment inline. OK? Index: netinet/ip_esp.c === RCS file: /home/cvs/src/sys/netinet/ip_esp.c,v retrieving revision 1.112 diff -u -p -r1.112 ip_esp.c --- netinet/ip_esp.c 22 Sep 2010 13:40:05 - 1.112 +++ netinet/ip_esp.c 22 Sep 2010 15:47:38 - @@ -932,9 +932,10 @@ esp_output(struct mbuf *m, struct tdb *t * Add padding -- better to do it ourselves than use the crypto engine, * although if/when we support compression, we'd have to do that. */ - pad = (u_char *) m_pad(m, padding + alen); + mo = m_inject(m, m-m_pkthdr.len, padding + alen, M_DONTWAIT); + pad = mtod(mo, u_char *); Sure, but this code creates a NULL pointer dereference; mtod() isn't a function that does anything smart, as is evidenced by the explanation in the mbuf man page: #define mtod(m,t) ((t)((m)-m_data)) /* Yeurk! */ new diff. thanks for looking through! It looks sane enough to me, although you might also take a look at m_pulldown to ensure X contiguous bytes at offset Y in an mbuf chain, as, IIRC, that has the possibility of not requiring a new mbuf allocation if there truly is enough space in an existing mbuf. But, it's your bikeshed, you can choose whether it's grape- or strawberry-flavored. Index: netinet/ip_esp.c === RCS file: /home/cvs/src/sys/netinet/ip_esp.c,v retrieving revision 1.112 diff -u -p -r1.112 ip_esp.c --- netinet/ip_esp.c 22 Sep 2010 13:40:05 - 1.112 +++ netinet/ip_esp.c 23 Sep 2010 09:05:40 - @@ -932,12 +932,13 @@ esp_output(struct mbuf *m, struct tdb *t * Add padding -- better to do it ourselves than use the crypto engine, * although if/when we support compression, we'd have to do that. */ - pad = (u_char *) m_pad(m, padding + alen); - if (pad == NULL) { - DPRINTF((esp_output(): m_pad() failed for SA %s/%08x\n, + mo = m_inject(m, m-m_pkthdr.len, padding + alen, M_DONTWAIT); + if (mo == NULL) { + DPRINTF((esp_output(): m_inject failed for SA %s/%08x\n, ipsp_address(tdb-tdb_dst), ntohl(tdb-tdb_spi))); return ENOBUFS; } + pad = mtod(mo, u_char *); /* Self-describing or random padding ? */ if (!(tdb-tdb_flags TDBF_RANDOMPADDING)) @@ -1178,77 +1179,4 @@ checkreplaywindow32(u_int32_t seq, u_int *bitmap |= (((u_int32_t) 1) diff); return 0; -} - -/* - * m_pad(m, n) pads m with n bytes at the end. The packet header - * length is updated, and a pointer to the first byte of the padding - * (which is guaranteed to be all in one mbuf) is returned. - */ - -caddr_t -m_pad(struct mbuf *m, int n) -{ - struct mbuf *m0, *m1; - int len, pad; - caddr_t retval; - - if (n = 0) { /* No stupid arguments. */ - DPRINTF((m_pad(): pad length invalid (%d)\n, n)); - m_freem(m); - return NULL; - } - - len = m-m_pkthdr.len; - pad = n; - m0 = m; - - while (m0-m_len len) { - len -= m0-m_len; - m0 = m0-m_next; - } - - if (m0-m_len != len) { - DPRINTF((m_pad(): length mismatch (should be %d instead of - %d)\n, m-m_pkthdr.len, - m-m_pkthdr.len + m0-m_len - len)); - - m_freem(m); - return NULL; - } - - /* Check for zero-length trailing mbufs, and find the last one. */ - for (m1 = m0; m1-m_next; m1 = m1-m_next) { - if (m1-m_next-m_len != 0) { - DPRINTF((m_pad(): length mismatch (should be %d - instead of %d)\n, m-m_pkthdr.len, - m-m_pkthdr.len + m1-m_next-m_len)); - - m_freem(m); - return NULL; - } - - m0 = m1-m_next; - } - - if ((m0-m_flags M_EXT) || - m0-m_data + m0-m_len + pad = (m0-m_dat[MLEN])) { - /* Add an mbuf to the chain. */ - MGET(m1, M_DONTWAIT, MT_DATA); - if (m1 == 0) { - m_freem(m0); - DPRINTF((m_pad(): cannot append\n)); - return NULL; - } - - m0-m_next = m1; - m0 = m1; - m0-m_len = 0; - } - - retval = m0-m_data + m0-m_len; - m0-m_len += pad; - m-m_pkthdr.len += pad; - - return retval; } Index: netinet/ip_ipsp.h === RCS file: /home/cvs/src/sys
Re: revised kernel perf control
On Mon, Sep 13, 2010 at 04:15:29PM -0400, Ted Unangst wrote: On Fri, Sep 10, 2010 at 4:34 PM, Alexander Hall ha...@openbsd.org wrote: I wasn't aware of any problems with suspend. My laptop doesn't resume reliably anyway, so it's not something I tested for. But it's just running some code in a timeout. If timeouts are still running during suspend, I'd say that's somebody else's bug. ISTR that the problem was that during the later part of a suspend, only one cpu is active, while the throttling affects all of them. I don't know enough about that to tell anything though. Well, anyhow, with this diff, and one cpu pegged, my thinkpad did not survive a resume. While idle, though, all seems fine. OK, I'm willing to believe there is a problem, but I still think that at the point where running setperf would be harmful, timeouts should be disabled. Barring that, how am I supposed to check if there is an imminent suspend? Register a device with suspend resume hooks which disables/enables throttling?
mbuf cleanup
Finally getting around to finishing a c2k10 diff. This no longer a) panics nor b) drops ipsec traffic on my test box. The code to split a single mbuf is used in both m_inject and m_split, so roll that into its own function, m_splitm, and make them wrappers around the new function. There's at least one other place it could be used, but this will do for now. As said above, this affects ipsec setups, it also affects A-MSDU wireless traffic; tests of both and (off-list) reports will earn you my love. - Bert Index: kern/uipc_mbuf.c === RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.143 diff -u -p -r1.143 uipc_mbuf.c --- kern/uipc_mbuf.c15 Jul 2010 09:45:09 - 1.143 +++ kern/uipc_mbuf.c11 Sep 2010 17:07:07 - @@ -110,6 +110,8 @@ struct pool mclpools[MCLPOOLS]; intm_clpool(u_int); +struct mbuf *m_splitm(struct mbuf *, int, int); + int max_linkhdr; /* largest link-level header */ int max_protohdr; /* largest protocol header */ int max_hdr; /* largest link+protocol header */ @@ -1064,130 +1066,85 @@ m_getptr(struct mbuf *m, int loc, int *o } /* - * Inject a new mbuf chain of length siz in mbuf chain m0 at - * position len0. Returns a pointer to the first injected mbuf, or - * NULL on failure (m0 is left undisturbed). Note that if there is - * enough space for an object of size siz in the appropriate position, + * Inject a new mbuf chain of length len in mbuf chain m at + * position off. Returns a pointer to the first injected mbuf, or + * NULL on failure (m is left undisturbed). Note that if there is + * enough space for an object of size len in the appropriate position, * no memory will be allocated. Also, there will be no data movement in - * the first len0 bytes (pointers to that will remain valid). + * the first off bytes (pointers to that will remain valid). * - * XXX It is assumed that siz is less than the size of an mbuf at the moment. + * XXX It is assumed that len is less than the size of an mbuf at the moment. */ struct mbuf * -m_inject(struct mbuf *m0, int len0, int siz, int wait) +m_inject(struct mbuf *m, int off, int len, int wait) { - struct mbuf *m, *n, *n2 = NULL, *n3; - unsigned len = len0, remain; + struct mbuf *n, *o, *p; + int off1; - if ((siz = MHLEN) || (len0 = 0)) - return (NULL); - for (m = m0; m len m-m_len; m = m-m_next) - len -= m-m_len; - if (m == NULL) + if (len MHLEN || len = 0) return (NULL); - remain = m-m_len - len; - if (remain == 0) { - if ((m-m_next) (M_LEADINGSPACE(m-m_next) = siz)) { - m-m_next-m_len += siz; - if (m0-m_flags M_PKTHDR) - m0-m_pkthdr.len += siz; - m-m_next-m_data -= siz; - return m-m_next; - } - } else { - n2 = m_copym2(m, len, remain, wait); - if (n2 == NULL) - return (NULL); - } - MGET(n, wait, MT_DATA); - if (n == NULL) { - if (n2) - m_freem(n2); + if ((n = m_getptr(m, off, off1)) == NULL) + return (NULL); + + if ((o = m_get(wait, MT_DATA)) == NULL) + return (NULL); + + if ((p = m_splitm(n, off1, wait)) == NULL) { + m_freem(o); return (NULL); } - n-m_len = siz; - if (m0-m_flags M_PKTHDR) - m0-m_pkthdr.len += siz; - m-m_len -= remain; /* Trim */ - if (n2) { - for (n3 = n; n3-m_next != NULL; n3 = n3-m_next) - ; - n3-m_next = n2; - } else - n3 = n; - for (; n3-m_next != NULL; n3 = n3-m_next) - ; - n3-m_next = m-m_next; - m-m_next = n; - return n; + o-m_len = len; + o-m_next = p; + n-m_next = o; + + if (m-m_flags M_PKTHDR) + m-m_pkthdr.len += len; + + return (o); } /* - * Partition an mbuf chain in two pieces, returning the tail -- - * all but the first len0 bytes. In case of failure, it returns NULL and - * attempts to restore the chain to its original state. + * Create a pair of mbufs, leaving the chain intact. */ struct mbuf * -m_split(struct mbuf *m0, int len0, int wait) +m_splitm(struct mbuf *m, int off, int wait) { - struct mbuf *m, *n; - unsigned len = len0, remain, olen; + struct mbuf *n; - for (m = m0; m len m-m_len; m = m-m_next) - len -= m-m_len; - if (m == NULL) + if (off m-m_len) return (NULL); - remain = m-m_len - len; - if (m0-m_flags M_PKTHDR) { - MGETHDR(n, wait, m0-m_type); - if (n == NULL) -
Re: generating new host key...
On Wed, Sep 08, 2010 at 11:39:59AM -0400, Ted Unangst wrote: On Tue, Sep 7, 2010 at 7:18 PM, Alexander Hall ha...@openbsd.org wrote: $ which true false /usr/bin/true /usr/bin/false while those should be available to /etc/rc, I'd prefer not using them. -5 points for using which. :) $ whence -v true true is a shell builtin I happen to think that explicit true and false values make things easier to read, without as much [ ] noise. Truly, you do not grasp the simple elegance of a banana-shaped bikeshed.
Re: sbt/sdmmc locking update
On Sat, Aug 21, 2010 at 05:21:20PM -0700, Matthew Dempsky wrote: On Sat, Aug 21, 2010 at 10:04 AM, Bret S. Lambert blamb...@openbsd.org wrote: This diff changes the locking in sbt and sdmmc from the old-and-busted lockmgr locks to the new-hotness rwlock locks. I don't have the hardware to test this on, so anybody with sbt/sdmmc is encouraged to give it a spin. Why not simply change the SDMMC_LOCK/SDMMC_UNLOCK/SDMMC_ASSERT_LOCKED macros and leave the .c files untouched (other than the lock init)? A version that did that was offered, but I was convinced off-list that my original desire to not obfuscate the locking was correct. Also, I'd prefer rw_exit_write() be used instead of rw_exit() when you know you're holding a write lock. As I believe you said before. My bikeshed, therefore it will be banana-shaped.
Re: Add timeout_add_abstv(9)
On Fri, Aug 20, 2010 at 12:27:39PM -0700, Matthew Dempsky wrote: On Fri, Aug 20, 2010 at 11:47 AM, Bret S. Lambert bret.lamb...@gmail.com wrote: And this one, like that one, lacks context. The hzto/tvtohz diff was just a const correctness fix. I didn't think it needed more context on its own. What's the ultimate goal? Just to remove some calls to hzto? timeout(9) suggests avoiding using timeout_add(9) directly in favor of higher-level functions, and I think it's worth adding utility functions for common use patterns. I just noticed that a number of remaining timeout_add(9) calls were of the form timeout_add(..., hzto(tv)). From an abstraction point of view, I don't think it makes sense for so much code to needlessly know about ticks. So...yes, it *was* just to remove calls to hzto. And you're adding another interface for it, which just seems grotesque and excessive. IMO, the correct way to get rid of this is to use the macros provided in time.h which do math on timevals, and then stuff that derived value into a timeout_add_tv. Also realize that there were subtle issues with hz-tv conversions when I started converting to the timeout_add_foo functions, which need to be resolved before we can do these conversions, as the conversions in sys/kern/kern_clock.c do something differently, and not doing it in that manner ends up whacking out drivers which implicitly depend upon that behavior. Please read those, and join me in my WTF.
Re: Add timeout_add_abstv(9)
Replied to quickly just now and missed the following... From an abstraction point of view, I don't think it makes sense for so much code to needlessly know about ticks. Yes, thus the conversions to actual time values. And, again, a mention that a discussion needs to be had concerning issues raised by those who have need of more specific behaviors than the current timeout API provides (for the purposes of POSIX-specified thread behavior, IIRC, was what guenther@ needed).
Re: defining ports LOCKDIR
On Wed, Jun 16, 2010 at 02:02:37PM +0200, Antoine Jacoutot wrote: On Wed, 16 Jun 2010, Marc Espie wrote: 3 days is a bit short on some arch. /var/tmp gives you 7 days, but isn't cleared at boot... daily could be set to ignore those... after all, it already ignores some other stuff. Sure, I was just pointing that one needed to be careful ;-) What about /var/run? cleared on boot, isn't ratfucked by /etc/daily. -- Antoine
Re: defining ports LOCKDIR
On Wed, Jun 16, 2010 at 02:15:37PM +0200, Marc Espie wrote: On Wed, Jun 16, 2010 at 02:11:47PM +0200, Bret S. Lambert wrote: On Wed, Jun 16, 2010 at 02:02:37PM +0200, Antoine Jacoutot wrote: On Wed, 16 Jun 2010, Marc Espie wrote: 3 days is a bit short on some arch. /var/tmp gives you 7 days, but isn't cleared at boot... daily could be set to ignore those... after all, it already ignores some other stuff. Sure, I was just pointing that one needed to be careful ;-) What about /var/run? cleared on boot, isn't ratfucked by /etc/daily. !root... bitch, bitch, bitch! :p
Re: WIP: packet inspection in PF
Is there some reason that divert sockets (``man divert'') can't do this for you? On Sun, Jun 13, 2010 at 03:27:57AM +0400, Vadim Jukov wrote: Hello, tech@, especially PF hackers! This is a work-in-progress patch that implements direct packet inspection in PF. This is needed in the cases when traffic could not be easily detected by other mechanisms. The actual example is new UDP-based protocol of uTorrent program that spams networks heavily, and could be detected only by comparing value at the offset 0x40 with 0x7FFFAB. The main reason I publish uncompleted work is that I want to receive some clues, particularily: 1. I detect beginning of actual data as pd-tot_len - pd-p_len - is it right (method to do so)? 2. I use m_data, m_len and m_next to loop through mbuf chain - is it right (method to do so)? Currently, it compiles, runs, but doesn't work - please do not actually run this patch unless you want to duplicate my work. :) Thanks in advance. -- Best wishes, Vadim Zhukov Index: share/man/man5/pf.conf.5 === RCS file: /cvs/src/share/man/man5/pf.conf.5,v retrieving revision 1.476 diff -u -r1.476 pf.conf.5 --- share/man/man5/pf.conf.5 19 May 2010 13:51:37 - 1.476 +++ share/man/man5/pf.conf.5 12 Jun 2010 23:16:15 - @@ -434,6 +434,33 @@ rule that is used when a packet does not match any rules does not allow IP options. .Pp +.It Xo +.Ar inspect Aq Ar value +.Ar at Aq Ar offset +.Xc +Tests packet contents at the +.Ar offset +to be equal to +.Ar value . +Note that offset starts after protocol header. +.Ar value +can be specified as plain strings, or as hexadecimal raw strings (i.e., +starting with 0x). +In the latter case you can embed any special characters. +Maximum length of +.Ar value is 64 characters. +.Pp +.It Xo +.Ar inspect Aq Ar mask +.Ar maskop Aq Ar value +.Ar at Aq Ar offset +.Xc +Same as previous, but also allows to specify a mask to applied to +the data from packet before comparing to +.Ar value . +Two operations supported are logical and and logical exclusive or. +They're specified using and ^ characters, respectively. +.Pp .It Ar divert-packet Aq Ar port Used to send matching packets to .Xr divert 4 @@ -2643,7 +2670,8 @@ nat-to ( redirhost | { redirhost-list } ) [ portspec ] [ pooltype ] [ static-port ] | [ fastroute | route ] | - [ received-on ( interface-name | interface-group ) ] + [ received-on ( interface-name | interface-group ) ] | + inspect scrubopts = scrubopt [ [ , ] scrubopts ] scrubopt = no-df | min-ttl number | max-mss number | @@ -2786,6 +2814,8 @@ upperlimit-sc = upperlimit sc-spec sc-spec= ( bandwidth-spec | ( bandwidth-spec number bandwidth-spec ) ) +inspect= inspect [ inspect-op ] string at number +inspect-op = string ( | ^ ) include= include filename .Ed .Sh FILES Index: sys/net/pf.c === RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.691 diff -u -r1.691 pf.c --- sys/net/pf.c 7 May 2010 13:33:16 - 1.691 +++ sys/net/pf.c 12 Jun 2010 23:16:15 - @@ -230,6 +230,8 @@ struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); int pf_check_congestion(struct ifqueue *); +int pf_inspect(struct pf_pdesc *, struct mbuf *, + struct pf_rule *); int pf_match_rcvif(struct mbuf *, struct pf_rule *); extern struct pool pfr_ktable_pl; @@ -2271,6 +2273,54 @@ } int +pf_inspect(struct pf_pdesc *pd, struct mbuf *m, struct pf_rule *r) { + u_int32_t at, i, mpos, pos; + charcv; + + if (r-inspect_at + r-inspect_len pd-p_len) + return (0); + at = r-inspect_at + (pd-tot_len - pd-p_len); + + for (pos = 0; pos + m-m_len at;) { + pos += m-m_len; + m = m-m_next; + if (m == NULL) + /* XXX: Should not be reached */ + return (0); + } + mpos = at - pos; + + for (i = 0; i r-inspect_len; i++, mpos++) { + while (mpos = m-m_len) { + pos += m-m_len; + m = m-m_next; + if (m == NULL) + /* XXX: Should not be reached */ + return (0); + mpos = 0; + } + switch (r-inspect_op) { + case PF_INSOP_CMP: + cv = m-m_data[mpos]; + break; + case PF_INSOP_AND: + cv = m-m_data[mpos]
Re: WIP: packet inspection in PF
On Sun, Jun 13, 2010 at 12:41:01PM +0400, Vadim Zhukov wrote: Hm-m-m, could you explain better, please? I don't see the way to do such filtering with diverting, excluding writing a proxy app listening all the traffic. Why do you assume I'm excluding a proxy app? 2010/6/13, Bret S. Lambert bret.lamb...@gmail.com: Is there some reason that divert sockets (``man divert'') can't do this for you? On Sun, Jun 13, 2010 at 03:27:57AM +0400, Vadim Jukov wrote: Hello, tech@, especially PF hackers! This is a work-in-progress patch that implements direct packet inspection in PF. This is needed in the cases when traffic could not be easily detected by other mechanisms. The actual example is new UDP-based protocol of uTorrent program that spams networks heavily, and could be detected only by comparing value at the offset 0x40 with 0x7FFFAB. The main reason I publish uncompleted work is that I want to receive some clues, particularily: 1. I detect beginning of actual data as pd-tot_len - pd-p_len - is it right (method to do so)? 2. I use m_data, m_len and m_next to loop through mbuf chain - is it right (method to do so)? Currently, it compiles, runs, but doesn't work - please do not actually run this patch unless you want to duplicate my work. :) Thanks in advance. -- Best wishes, Vadim Zhukov Index: share/man/man5/pf.conf.5 === RCS file: /cvs/src/share/man/man5/pf.conf.5,v retrieving revision 1.476 diff -u -r1.476 pf.conf.5 --- share/man/man5/pf.conf.5 19 May 2010 13:51:37 - 1.476 +++ share/man/man5/pf.conf.5 12 Jun 2010 23:16:15 - @@ -434,6 +434,33 @@ rule that is used when a packet does not match any rules does not allow IP options. .Pp +.It Xo +.Ar inspect Aq Ar value +.Ar at Aq Ar offset +.Xc +Tests packet contents at the +.Ar offset +to be equal to +.Ar value . +Note that offset starts after protocol header. +.Ar value +can be specified as plain strings, or as hexadecimal raw strings (i.e., +starting with 0x). +In the latter case you can embed any special characters. +Maximum length of +.Ar value is 64 characters. +.Pp +.It Xo +.Ar inspect Aq Ar mask +.Ar maskop Aq Ar value +.Ar at Aq Ar offset +.Xc +Same as previous, but also allows to specify a mask to applied to +the data from packet before comparing to +.Ar value . +Two operations supported are logical and and logical exclusive or. +They're specified using and ^ characters, respectively. +.Pp .It Ar divert-packet Aq Ar port Used to send matching packets to .Xr divert 4 @@ -2643,7 +2670,8 @@ nat-to ( redirhost | { redirhost-list } ) [ portspec ] [ pooltype ] [ static-port ] | [ fastroute | route ] | - [ received-on ( interface-name | interface-group ) ] + [ received-on ( interface-name | interface-group ) ] | + inspect scrubopts = scrubopt [ [ , ] scrubopts ] scrubopt = no-df | min-ttl number | max-mss number | @@ -2786,6 +2814,8 @@ upperlimit-sc = upperlimit sc-spec sc-spec= ( bandwidth-spec | ( bandwidth-spec number bandwidth-spec ) ) +inspect= inspect [ inspect-op ] string at number +inspect-op = string ( | ^ ) include= include filename .Ed .Sh FILES Index: sys/net/pf.c === RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.691 diff -u -r1.691 pf.c --- sys/net/pf.c 7 May 2010 13:33:16 - 1.691 +++ sys/net/pf.c 12 Jun 2010 23:16:15 - @@ -230,6 +230,8 @@ struct pf_state_key_cmp *, u_int, struct mbuf *); intpf_src_connlimit(struct pf_state **); intpf_check_congestion(struct ifqueue *); +intpf_inspect(struct pf_pdesc *, struct mbuf *, + struct pf_rule *); intpf_match_rcvif(struct mbuf *, struct pf_rule *); extern struct pool pfr_ktable_pl; @@ -2271,6 +2273,54 @@ } int +pf_inspect(struct pf_pdesc *pd, struct mbuf *m, struct pf_rule *r) { + u_int32_t at, i, mpos, pos; + charcv; + + if (r-inspect_at + r-inspect_len pd-p_len) + return (0); + at = r-inspect_at + (pd-tot_len - pd-p_len); + + for (pos = 0; pos + m-m_len at;) { + pos += m-m_len; + m = m-m_next; + if (m == NULL) + /* XXX: Should not be reached */ + return (0); + } + mpos = at - pos; + + for (i = 0; i r-inspect_len; i++, mpos++) { + while (mpos = m-m_len) { + pos += m-m_len; + m = m-m_next; + if (m == NULL
Re: Source Overview
On Mon, Apr 19, 2010 at 04:15:02PM +, Thordur Bjornsson wrote: And if you value your sanity, stay out of anything resembling filesystems. This is a lie. Hacking on filesystems, and the VFS layer in general is a very rewarding experince, just ask Bob. NFS for example, has been a source of joy for OpenBSD developers for years! Yes, if by joy you mean anal hemmoraging. 2) ?Is there something like an openbsd janitors project where newbies can start contributing small patches? similar to the Linux janitors project? Not at all. The philosophy behind not having one is that it's considered dangerous to farm out work to the inexperienced (and this exact topic has been brought up before, usually by people whining that we didn't make them feel special enough by not having one). Also it leads to people doing KNF style diffs, just to do KNF style diffs. Noone learns anything. Most KNF style diffs you see coming from developers is due to them having to read some code, and they cleaned up a little while doing so. While KNF is great, doing KNF just for the sake of doing KNF is hardly ever worth it IMHO.
Re: Source Overview
Or, in short, we need to not deter people straight away, and accept that perhaps sometimes decent programmers start from ones that make lots of mistakes. Perhaps a ports TODO similar to the NetBSD ports TODO might help; it doesn't require quite the same level of kernel or userspace hacking and provides very visible feedback and thanks once completed. But you're also missing a big point: if someone can't figure out what needs to be fixed, or is incapable of mustering enough motivation to do the work, then they're not as useful to us as someone who can do one or the other (or hopefully both). tedu@ once said that there's an entrance exam to get an account; this happens to be part of it, IMO. PS - put this back on tech@ where it was originally, misc@ has enough noise without this going there as well
Re: Source Overview
Surely people can move on from the low hanging fruit of a port that needs a ./configure make install, to minor code or makefile changes, to specific new functionality to from the scratch coding. Not to sound like a dick, but this illustrates some of what I'm saying: If I hold up one corner and a student cannot come back to me with the other three, I do not go on with the lesson. - Confucius, the Analects
Re: Atheros AR5424 rev 0x01 hal error
On Tue, Apr 20, 2010 at 10:55:18AM -0400, Adam M. Dutko wrote: When I attempt to use my wireless card (AR5424) I get the following: ath0: unable to reset hardware; hal status 3520208968 The number at the end changes when I try again. That smells like an uninitialized value for status, to me; try setting it to something known to be stupid and incorrect at the top of the function. I believe I've traced where the error message is thrown to a section of code in src/sys/dev/ic/ath.c: if (!ath_hal_reset(ah, ic-ic_opmode, hchan, AH_TRUE, status)) { printf(%s: unable to reset hardware; hal status %u\n, ifp-if_xname, status); error = EIO; goto done; } I'm going to recompile with ATH_DEBUG=10 and see if I can get more information. I'm also trying to trace down the ath_hal_reset function to see if I can't find something in that area. Does anyone that's worked on a similar error have any pointers? Thanks.
Re: Source Overview
On Mon, Apr 19, 2010 at 09:59:06AM -0400, Adam M. Dutko wrote: The obvious answer to this questions is Just read the source... but I still want to ask if someone is aware of a good overview of the OpenBSD source code? I've watched several presentations by Ted Unangst, Jason Dixon and co. and there seems to be a good amount of information spread across the web, but not a single canonical reference besides the source code (yes, I know source is very important). I know the base of code is very very large, so maybe instead of the whole repository, how about important parts/subsystems? Are any of you aware of such a document or documents? Are there areas that are easier for relative newbies to start in versus other areas? (eg. wireless drivers vs. SMP) Is there something like an openbsd janitors project where newbies can start? Thanks in advance. http://www.openbsd.org/papers/ has some good pointers; specifically claudio@'s presentations on the network internals which have call graphs, and jsg@'s introduction to drivers. man 9 style is a good introduction to the conventions used in the source tree. I'm going to assume that you mean an overview as to which directories mean what inside the kernel, since outside of the kernel, it's rather self-evident (e.g., src/usr.sbin/foo is for /usr/sbin/foo): src/sys/ kern/ - generic stuffs (signals, scheduling, vnodes, syscalls) net/- generic net stuffs (interface handling, pf, routing) netinet{,6}/- IPv{4,6} stuffs net*/ - non-IP network stuffs sys/- system header files arch/ - architecture-dependant code dev/- hardware drivers stuffs crypto/ - kernel crypto stuffs *fs/- filesystem code nfs/- the source of all horror conf/ - kernel configuration stuffs lib/- libc routines ported to the kernel, mostly scsi/ - self-explanatory uvm/- virtual memory stuffs altq/ - also self-explanatory compat/ - compatibility routines for other OSes ddb/- kernel debugger stuffs I may have missed something (or be slightly incorrect; hopefully someone smarter than me can help clean this up if I'm too far off). Hopefully this is useful for somebody. -Adam
Re: Source Overview
On Mon, Apr 19, 2010 at 11:48:02AM -0400, Adam M. Dutko wrote: On Mon, Apr 19, 2010 at 10:57 AM, Bret S. Lambert blamb...@openbsd.orgwrote: ... snip ... Hopefully this is useful for somebody. It is, thank you. With regard to the other questions I peppered everyone with... :-) 1) Are there areas that are easier for relative newbies to start in versus other areas? I know this depends on a lot of things, to include experience. Hypothetically, someone that has some C experience, but not a lot of kernel (and subsystem) experience. Is it better to start from the bottom up like bootstrap to init? or is it better to start with memory management? network drivers? What is usually the best area from a learning and future utility perspective? If you're going after userland stuff, the advice I've given (and where I started) is to read libc; it's the basis of userland programming, and illustrates some of the tricks you can do with C. For the kernel side, I'd say start looking at the system calls (cd /usr/src/sys/kern grep -n ^sys_ *.c). Everything else is just going deeper into the rabbit hole. And if you value your sanity, stay out of anything resembling filesystems. 2) Is there something like an openbsd janitors project where newbies can start contributing small patches? similar to the Linux janitors project? Not at all. The philosophy behind not having one is that it's considered dangerous to farm out work to the inexperienced (and this exact topic has been brought up before, usually by people whining that we didn't make them feel special enough by not having one). Read, understand, find an issue, post a patch, get told why it's wrong, fix it, repeat. - Bert
Re: iflag for sed
On Tue, Feb 02, 2010 at 04:47:49PM +1100, Damien Miller wrote: On Tue, 2 Feb 2010, Ted Unangst wrote: Ordinarily, extensions to standard utilities are bad, but I think it's worth considering when the flag is easily understood and provides a substantial benefit. I think case insenstive matching in sed qualifies. The diff below actually implements this behavior two different ways. Option 1 is to add /i to substitution patterns. Option 2 adds -i to the command line. +1 on option 1. Case insensitive matching would be very handy, but I don't like the idea of adding a non-standard commandline flag that isn't supported somewhere else. Agreed, especially since the -i flag means something entirely different in the non-standard implementation we're imitating here.
Re: UBC?
On Sun, Jan 31, 2010 at 01:18:59PM +, Stuart Henderson wrote: On 2010/01/31 09:50, David Gwynne wrote: On Sun, Jan 31, 2010 at 09:41:14AM +1000, David Gwynne wrote: pmap_enter in this situation should fail, not panic. the error would be handled properly as the stack unwinds up to ami_allocmem. i can change ami_allocmem to take a NOWAIT etc as an argument rather than just assume it, so callers with process context (like the sensor refresh shown below) can sleep waiting for mappings. here's such a change. i havent even compiled this, let alone tested it. no problems noticed here so far with ami0 at pci3 dev 14 function 0 Symbios Logic MegaRAID SATA 4x/8x rev 0x07: apic 2 int 4 (irq 9) ami0: LSI 3008, 32b, FW 814B, BIOS vH431, 128MB RAM ami0: 1 channels, 0 FC loops, 1 logical drives scsibus0 at ami0: 40 targets scsibus1 at ami0: 16 targets I know I'm kind of the peanut gallery in sys/dev, but we've already got named flags which mean something to malloc(9), and it increases legibility to use them, IMO, so I've modified dlg's diff to use M_{,NO}WAIT where appropriate. I have no ami hardware, so I can't test this, but it does compile :-\ Index: ami.c === RCS file: /cvs/src/sys/dev/ic/ami.c,v retrieving revision 1.198 diff -u -p -r1.198 ami.c --- ami.c 6 Dec 2009 12:31:10 - 1.198 +++ ami.c 31 Jan 2010 13:29:55 - @@ -124,7 +124,7 @@ voidami_write(struct ami_softc *, bus_ void ami_copyhds(struct ami_softc *, const u_int32_t *, const u_int8_t *, const u_int8_t *); -struct ami_mem *ami_allocmem(struct ami_softc *, size_t); +struct ami_mem *ami_allocmem(struct ami_softc *, size_t, int); void ami_freemem(struct ami_softc *, struct ami_mem *); intami_alloc_ccbs(struct ami_softc *, int); @@ -256,31 +256,32 @@ ami_write(struct ami_softc *sc, bus_size } struct ami_mem * -ami_allocmem(struct ami_softc *sc, size_t size) +ami_allocmem(struct ami_softc *sc, size_t size, int wait) { struct ami_mem *am; int nsegs; - am = malloc(sizeof(struct ami_mem), M_DEVBUF, M_NOWAIT|M_ZERO); + am = malloc(sizeof(struct ami_mem), M_DEVBUF, M_ZERO | wait); if (am == NULL) return (NULL); am-am_size = size; + wait = (wait == M_NOWAIT ? BUS_DMA_NOWAIT : 0); if (bus_dmamap_create(sc-sc_dmat, size, 1, size, 0, - BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, am-am_map) != 0) + wait | BUS_DMA_ALLOCNOW, am-am_map) != 0) goto amfree; if (bus_dmamem_alloc(sc-sc_dmat, size, PAGE_SIZE, 0, am-am_seg, 1, - nsegs, BUS_DMA_NOWAIT) != 0) + nsegs, wait) != 0) goto destroy; if (bus_dmamem_map(sc-sc_dmat, am-am_seg, nsegs, size, am-am_kva, - BUS_DMA_NOWAIT) != 0) + wait) != 0) goto free; if (bus_dmamap_load(sc-sc_dmat, am-am_map, am-am_kva, size, NULL, - BUS_DMA_NOWAIT) != 0) + wait) != 0) goto unmap; memset(am-am_kva, 0, size); @@ -337,7 +338,8 @@ ami_alloc_ccbs(struct ami_softc *sc, int return (1); } - sc-sc_ccbmem_am = ami_allocmem(sc, sizeof(struct ami_ccbmem) * nccbs); + sc-sc_ccbmem_am = ami_allocmem(sc, + sizeof(struct ami_ccbmem) * nccbs, M_NOWAIT); if (sc-sc_ccbmem_am == NULL) { printf(: unable to allocate ccb dmamem\n); goto free_ccbs; @@ -413,14 +415,14 @@ ami_attach(struct ami_softc *sc) paddr_t pa; int s; - am = ami_allocmem(sc, NBPG); + am = ami_allocmem(sc, NBPG, M_NOWAIT); if (am == NULL) { printf(: unable to allocate init data\n); return (1); } pa = htole32(AMIMEM_DVA(am)); - sc-sc_mbox_am = ami_allocmem(sc, sizeof(struct ami_iocmd)); + sc-sc_mbox_am = ami_allocmem(sc, sizeof(struct ami_iocmd), M_NOWAIT); if (sc-sc_mbox_am == NULL) { printf(: unable to allocate mbox\n); goto free_idata; @@ -1928,7 +1930,7 @@ ami_mgmt(struct ami_softc *sc, u_int8_t } if (size) { - if ((am = ami_allocmem(sc, size)) == NULL) { + if ((am = ami_allocmem(sc, size, M_WAITOK)) == NULL) { error = ENOMEM; goto memerr; }
Re: UBC?
On Fri, Jan 29, 2010 at 11:17:07AM -0500, Donald Allen wrote: Any developers care to comment on the state of the unified buffer cache? I did some searching and turned up threads here and there in the mail archives. It looks like an attempt was made to add it around 8 years ago, some bad things happened, and the change got backed out. Looking at the code, it looks like there is some conditional ubc code that appears not to be included in the current release. Have I got this right -- OpenBSD doesn't provide a unified buffer cache at this time? There is no spoon. /Don Allen
Re: UBC?
On Fri, Jan 29, 2010 at 01:47:09PM -0700, Bob Beck wrote: Well, to be fair, he was asking for $buzzword. So we could load him up with some Customer-Facing Enterprise Extranet Bundles, served over XML in a proactive win-win paradigm. Then his disk reads would be all like zoom!!! Or, for brevity's sake: there is no spoon. http://professionalsuperhero.com/ That link isn't in iCal format; I can't open it in my scheduler.
Re: Add support to AR5424
On Mon, Jan 25, 2010 at 06:18:33PM +, Luis Henriques wrote: On Mon, Jan 25, 2010 at 01:52:12PM +, Stuart Henderson wrote: On 2010/01/25 00:50, Luis Henriques wrote: Re-sending with cvs diff -upRN Wha, you mean I have to dismantle my AspireOne again and put the minipcie back in to test it? But last time I did that I had to drill out some of the screws :-) (only joking, I can do this on the eee instead, it's far easier to dismantle! and this is great news). Please also send URLs of where the borrowed code is from so the licensing is totally clear. Here's the link to the Linux ath5k driver: http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.32.y.git;a=tree;f=drivers/net/wireless/ath/ath5k;h=d625eae851dd0c1e42e52a182d8408ebee855377;hb=a2febcd43d859a48672ad922990bd27e5628271f (not 100% sure if this is the exact same version I used, can check that later if it actually matters.) File main.c (in directory ..) sets the license to Dual BSD/GPL. IIRC, it's more dependent upon the licence in the individual files, not the note that satisfies the Linux kernel's just utterly weird tell me your license so I can refuse to load you bits. But, the files in there, where they aren't (c) reyk@, appear to be either straight-up BSD-with-GPL option, or apparently cut-n-paset from OpenBSD's license.template(!) (as in the file you say is dual- licensed), so I think you're on solid footing here. And the link to the NetBSD driver: http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/external/isc/atheros_hal/dist/?only_with_tag=MAIN
Re: kernel hacking
On Thu, Dec 10, 2009 at 02:49:51PM -0300, Robert Yuri wrote: I'll learn just reading kernel code ? so, many night you need to understand it ? Absolutely: $ pwd /usr/src/sys/kern $ wc -l * | tail -n 1 64300 total It's going to take you many nights just to *read* it. Not to mention the namecache-induced vomit breaks.
Dynamic SysV Message Queue Implementation
The following diff makes SysV message queues and changes them from static, boot-time to dynamic, run-time memory allocation. This makes writing sysctls which modify the limits trivial, no longer requiring you to recompile your kernel to do so. Also makes the code completely machine-independent, instead of allocating the memory before the vm system is up. The only thing that I'm aware of that uses this (thanks to todd@) is squid with diskd. If you have that setup, please give this a test to make sure no edge cases have crept in. - Bert Index: kern/sysv_msg.c === RCS file: /cvs/src/sys/kern/sysv_msg.c,v retrieving revision 1.21 diff -u -p -r1.21 sysv_msg.c --- kern/sysv_msg.c 2 Jun 2009 12:11:16 - 1.21 +++ kern/sysv_msg.c 3 Aug 2009 06:54:28 - @@ -1,6 +1,20 @@ /* $OpenBSD: sysv_msg.c,v 1.21 2009/06/02 12:11:16 guenther Exp $ */ /* $NetBSD: sysv_msg.c,v 1.19 1996/02/09 19:00:18 christos Exp $ */ - +/* + * Copyright (c) 2009 Bret S. Lambert blamb...@openbsd.org + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ /* * Implementation of SVID messages * @@ -21,108 +35,69 @@ */ #include sys/param.h -#include sys/systm.h -#include sys/kernel.h -#include sys/proc.h -#include sys/msg.h +#include sys/types.h #include sys/malloc.h - +#include sys/mbuf.h #include sys/mount.h +#include sys/msg.h +#include sys/pool.h +#include sys/proc.h +#include sys/queue.h #include sys/syscallargs.h +#include sys/sysctl.h +#include sys/systm.h +#include sys/uio.h -#ifdef MSG_DEBUG -#defineDPRINTF(x) printf x -#else -#defineDPRINTF(x) -#endif - -int nfree_msgmaps; /* # of free map entries */ -short free_msgmaps;/* head of linked list of free map entries */ -struct msg *free_msghdrs; /* list of free msg headers */ -char *msgpool; /* MSGMAX byte long msg buffer pool */ -struct msgmap *msgmaps;/* MSGSEG msgmap structures */ -struct msg *msghdrs; /* MSGTQL msg headers */ -struct msqid_ds *msqids; /* MSGMNI msqid_ds struct's */ - -void msg_freehdr(struct msg *); +struct que *que_create(key_t, struct ucred *, int); +struct que *que_lookup(int); +struct que *que_key_lookup(key_t); +void que_wakeall(void); +void que_free(struct que *); +struct msg *msg_create(struct que *); +void msg_free(struct msg *); +void msg_enqueue(struct que *, struct msg *, struct proc *); +void msg_dequeue(struct que *, struct msg *, struct proc *); +struct msg *msg_lookup(struct que *, int); +int msg_copyin(struct msg *, const char *, size_t, struct proc *); +int msg_copyout(struct msg *, char *, size_t *, struct proc *); + +struct pool sysvmsgpl; +struct msginfo msginfo; + +TAILQ_HEAD(, que) msg_queues; + +int num_ques; +int num_msgs; +int sequence; +int maxmsgs; void msginit(void) { - int i; - - /* -* msginfo.msgssz should be a power of two for efficiency reasons. -* It is also pretty silly if msginfo.msgssz is less than 8 -* or greater than about 256 so ... -*/ - - i = 8; - while (i 1024 i != msginfo.msgssz) - i = 1; - - if (i != msginfo.msgssz) - panic(msginfo.msgssz %d not a small power of 2, msginfo.msgssz); - if (msginfo.msgseg 32767) - panic(msginfo.msgseg %d 32767, msginfo.msgseg); - - if (msgmaps == NULL) - panic(msgmaps is NULL); - - for (i = 0; i msginfo.msgseg; i++) { - if (i 0) - msgmaps[i-1].next = i; - msgmaps[i].next = -1; /* implies entry is available */ - } - free_msgmaps = 0; - nfree_msgmaps = msginfo.msgseg; - - if (msghdrs == NULL) - panic(msghdrs is NULL); - - for (i = 0; i msginfo.msgtql; i++) { - msghdrs[i].msg_type = 0; - if (i 0) - msghdrs[i-1].msg_next = msghdrs[i]; - msghdrs[i].msg_next = NULL; - } - free_msghdrs = msghdrs[0]; - - if (msqids == NULL) - panic(msqids is NULL); - - for (i = 0; i msginfo.msgmni; i++) { - msqids[i].msg_qbytes = 0; /* implies entry is available
NFS server cleanup diffs, 2 of 3
Index: nfs/nfs_serv.c === RCS file: /cvs/src/sys/nfs/nfs_serv.c,v retrieving revision 1.56 diff -u -p -r1.56 nfs_serv.c --- nfs/nfs_serv.c 14 Jun 2008 22:44:07 - 1.56 +++ nfs/nfs_serv.c 15 Jun 2008 04:36:23 - @@ -,7 +2211,7 @@ nfsrv_rmdir(nfsd, slp, procp, mrq) int v3 = (nfsd-nd_flag ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; + struct vnode *vp, *dirp = NULL; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; @@ -2241,7 +2230,7 @@ nfsrv_rmdir(nfsd, slp, procp, mrq) procp); else { vrele(dirp); - dirp = (struct vnode *)0; + dirp = NULL; } } if (error) { @@ -2255,31 +2244,35 @@ nfsrv_rmdir(nfsd, slp, procp, mrq) vp = nd.ni_vp; if (vp-v_type != VDIR) { error = ENOTDIR; - goto out; + goto abort; } /* * No rmdir . please. */ if (nd.ni_dvp == vp) { error = EINVAL; - goto out; + goto abort; } /* * The root of a mounted filesystem cannot be deleted. */ - if (vp-v_flag VROOT) + if (vp-v_flag VROOT) { error = EBUSY; -out: - if (!error) { - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); + goto abort; } + + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, nd.ni_cnd); + goto out; + +abort: + /* the following code is only reached via error cases above */ + VOP_ABORTOP(nd.ni_dvp, nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); +out: if (dirp) { diraft_ret = VOP_GETATTR(dirp, diraft, cred, procp); vrele(dirp);