DTrace "sched" provider - reviews?
I've taken a stab at implementing the DTrace "sched" provider, for most of the probes listed here which made sense: http://dtrace.org/guide/chp-sched.html#tbl-sched Please note that this was my first foray into the scheduling code, as well an early effort at implementing a dtrace provider; it's entirely possible I got big honking things flat-out wrong. This has been tested to a limited degree; to the effect of making sure probes fire (though I'm not sure I've tested all of them), and also that they seem to make some amount of sense. I haven't yet put them through any great paces. Any and all feedback welcome. Index: sys/kern/kern_runq.c === RCS file: /cvsroot/src/sys/kern/kern_runq.c,v retrieving revision 1.44 diff -u -r1.44 kern_runq.c --- sys/kern/kern_runq.c 7 Oct 2015 00:32:34 - 1.44 +++ sys/kern/kern_runq.c 1 Feb 2016 21:57:28 - @@ -29,7 +29,9 @@ #include __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.44 2015/10/07 00:32:34 christos Exp $"); +#ifdef _KERNEL_OPT #include "opt_dtrace.h" +#endif #include #include @@ -47,6 +49,7 @@ #include #include #include +#include /* * Priority related definitions. @@ -127,6 +130,13 @@ struct lwp *curthread; #endif +SDT_PROVIDER_DECLARE(sched); + +SDT_PROBE_DEFINE3(sched, kernel, , dequeue, "struct lwp *", +"struct proc *", "struct cpuinfo *"); +SDT_PROBE_DEFINE4(sched, kernel, , enqueue, "struct lwp *", +"struct proc *", "struct cpuinfo *", "int"); + void runq_init(void) { @@ -254,6 +264,7 @@ if (eprio > spc->spc_maxpriority) spc->spc_maxpriority = eprio; + SDT_PROBE4(sched, kernel, , enqueue, l, l->l_proc, ci, 0); sched_newts(l); /* @@ -296,6 +307,7 @@ ci_rq->r_mcount--; q_head = sched_getrq(ci_rq, eprio); + SDT_PROBE3(sched, kernel, , dequeue, l, l->l_proc, NULL); TAILQ_REMOVE(q_head, l, l_runq); if (TAILQ_EMPTY(q_head)) { u_int i; Index: sys/kern/kern_sleepq.c === RCS file: /cvsroot/src/sys/kern/kern_sleepq.c,v retrieving revision 1.50 diff -u -r1.50 kern_sleepq.c --- sys/kern/kern_sleepq.c 5 Sep 2014 05:57:21 - 1.50 +++ sys/kern/kern_sleepq.c 1 Feb 2016 21:57:28 - @@ -34,6 +34,10 @@ * interfaces. */ +#ifdef _KERNEL_OPT +#include "opt_dtrace.h" +#endif + #include __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.50 2014/09/05 05:57:21 matt Exp $"); @@ -48,6 +52,7 @@ #include #include #include +#include /* * for sleepq_abort: @@ -62,6 +67,12 @@ #define IPL_SAFEPRI 0 #endif +SDT_PROVIDER_DECLARE(sched); + +SDT_PROBE_DEFINE2(sched, kernel, , wakeup, "struct lwp *", +"struct proc *"); +SDT_PROBE_DEFINE(sched, kernel, , sleep); + static int sleepq_sigtoerror(lwp_t *, int); /* General purpose sleep table, used by mtsleep() and condition variables. */ @@ -143,6 +154,7 @@ /* Update sleep time delta, call the wake-up handler of scheduler */ l->l_slpticksum += (hardclock_ticks - l->l_slpticks); + SDT_PROBE2(sched, kernel, , wakeup, l, l->l_proc); sched_wakeup(l); /* Look for a CPU to wake up */ @@ -217,6 +229,7 @@ /* Save the time when thread has slept */ l->l_slpticks = hardclock_ticks; + SDT_PROBE0(sched, kernel, , sleep); sched_slept(l); } Index: sys/kern/kern_synch.c === RCS file: /cvsroot/src/sys/kern/kern_synch.c,v retrieving revision 1.309 diff -u -r1.309 kern_synch.c --- sys/kern/kern_synch.c 13 Oct 2015 00:25:51 - 1.309 +++ sys/kern/kern_synch.c 1 Feb 2016 21:57:28 - @@ -71,9 +71,11 @@ #include __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.309 2015/10/13 00:25:51 pgoyette Exp $"); +#ifdef _KERNEL_OPT #include "opt_kstack.h" #include "opt_perfctrs.h" #include "opt_dtrace.h" +#endif #define __MUTEX_PRIVATE @@ -96,6 +98,7 @@ #include #include #include +#include #include @@ -105,6 +108,14 @@ int dtrace_vtime_active=0; dtrace_vtime_switch_func_t dtrace_vtime_switch_func; +SDT_PROVIDER_DEFINE(sched); + +SDT_PROBE_DEFINE2(sched, kernel, , off__cpu, "struct lwp *", +"struct proc *"); +SDT_PROBE_DEFINE(sched, kernel, , on__cpu); +SDT_PROBE_DEFINE(sched, kernel, , preempt); +SDT_PROBE_DEFINE(sched, kernel, , remain__cpu); + static void sched_unsleep(struct lwp *, bool); static void sched_changepri(struct lwp *, pri_t); static void sched_lendpri(struct lwp *, pri_t); @@ -296,6 +307,7 @@ KASSERT(l->l_stat == LSONPROC); l->l_kpriority = false; l->l_nivcsw++; + SDT_PROBE0(sched, kernel, , preempt); (void)mi_switch(l); KERNEL_LOCK(l->l_biglocks, l); } @@ -638,6 +650,7 @@ if (l != newl) { struct lwp *prevlwp; + SDT_PROBE2(sched, kernel, , off__cpu, newl, newl->l_proc); /* Release all locks, but leave the current LWP locked */ if (l->l_mutex == spc->spc_mutex) { /* @@ -731,6 +744,7 @@ pmap_activate(l); uvm_emap_switch(l);
Re: dtrace by default
On 08/20/15 10:52 AM, Taylor R Campbell wrote: I'd like to enable dtrace by default on i386, amd64, and arm -- that is, setting MKDTRACE=yes in make for the userland tools, and enabling options KDTRACE_HOOKS in the kernel for the hooks. The overhead of KDTRACE_HOOKS in the kernel is a predicted not-taken branch in a handful of places. I have not measured the speed impact but I expect it to be very small. All logic related to dtrace is relegated to a module, so unless you load dtrace.kmod, there's negligible memory impact. The overhead of MKDTRACE=yes in the userland is about 15 MB of dtrace tools and libraries on amd64. The benefits are manifold: low-overhead flexible diagnostics for performance and debugging issues in the field with no kernel rebuild or reboot. In the past few months while trying to help people debug things, my first question has often been `do you have dtrace?'. Objections? This has now (finally) been done. Userland bits are built for i386, amd64 and earm*, and i386 and amd64 GENERIC have KDTRACE_HOOKS. I left arm kernels alone for the moment, as that's a little more complicated to get right. +j
Re: wapbl + discard panic
On 8/10/15 10:05 PM, Taylor R Campbell wrote: Date: Mon, 10 Aug 2015 21:31:07 -0700 From: Jeff Rizzo r...@tastylime.net I'm guessing at the moment the answer is don't do that, but it looks like wapbl and discard aren't playing nice together: panic: kernel diagnostic assertion rw_lock_held(wl-wl_rwlock) Try attached patch? With that patch, the system comes up fine. Thanks! +j
wapbl + discard panic
I'm guessing at the moment the answer is don't do that, but it looks like wapbl and discard aren't playing nice together: panic: kernel diagnostic assertion rw_lock_held(wl-wl_rwlock) failed: file /home/riz/src/sys/kern/vfs_wapbl.c, line 1715 Stopped in pid 0.54 (system) at netbsd:cpu_Debugger+0x4: bx r14 db{0} bt 0xbfaffe34: netbsd:vpanic+0xc 0xbfaffe4c: netbsd:__udivmoddi4 0xbfaffe74: netbsd:wapbl_jlock_assert+0x54 0xbfaffe9c: netbsd:wapbl_add_buf+0x3c 0xbfaffebc: netbsd:bdwrite+0xc4 0xbfafff1c: netbsd:ffs_blkfree_cg+0x190 0xbfafff4c: netbsd:ffs_blkfree_td+0x60 0xbfafff7c: netbsd:ffs_discardcb+0x64 0xbfafffac: netbsd:workqueue_worker+0x84 db{0}
Re: NFS writes being corrupted?
On 8/4/15 1:13 PM, Jeff Rizzo wrote: On 8/4/15 4:20 AM, Robert Swindells wrote: David Holland wrote: Does that size vary with the NFS block size? Yep. Reducing blocksize to 8192 makes it barf on 8192+ byte files. Also is it using UDP or TCP ? TCP, but I just confirmed UDP has the problem too. The symptoms make me think of scrambled mbufs, if anything... My guess is that the panics that wiz and I saw in the checksum code on amd64 were also due to scrambled mbufs. My cubietruck seems fine using awge(4), I have built a fair number of packages over NFS recently. Robert Swindells Looks like awge(4) is seeing output errors: Name Mtu Network Address Ipkts Ierrs Opkts Oerrs Colls awge0 1500 Link00:1e:06:c3:49:c1 189582 0 134261 222 0 Not sure of what variety, though. The oerrs go up when reading a large file (90M) which checksums properly, but DON'T go up when writing/reading an 8k file which gets corrupted. +j I finally got around to sticking a USB interface (urtwn0) in and testing NFS over that... it was PAINFULLY SLOW - took well over a minute to copy a 4MB test file. But, the test file copied with no corruption! This would seem to indicate a problem with the particular interface (awge0), perhaps specific to the odroid-c1, as opposed to some l2 cache controller issue, which is kind of where I was leaning before. However, my banana pi has awge0 as well, but does not exhibit this corruption. Any suggestions what to try/test next gratefully accepted - I would really love to get nfs working on this board. +j
Re: NFS writes being corrupted?
On 8/4/15 4:20 AM, Robert Swindells wrote: David Holland wrote: Does that size vary with the NFS block size? Yep. Reducing blocksize to 8192 makes it barf on 8192+ byte files. Also is it using UDP or TCP ? TCP, but I just confirmed UDP has the problem too. The symptoms make me think of scrambled mbufs, if anything... My guess is that the panics that wiz and I saw in the checksum code on amd64 were also due to scrambled mbufs. My cubietruck seems fine using awge(4), I have built a fair number of packages over NFS recently. Robert Swindells Looks like awge(4) is seeing output errors: Name Mtu Network Address Ipkts IerrsOpkts Oerrs Colls awge0 1500 Link00:1e:06:c3:49:c1 189582 0 134261 222 0 Not sure of what variety, though. The oerrs go up when reading a large file (90M) which checksums properly, but DON'T go up when writing/reading an 8k file which gets corrupted. +j
NFS writes being corrupted?
I got my odroid-c1 back online yesterday with -current, and noticed that anything I copied to an NFS-mounted volume would get silently corrupted. (sha1 from the NFS client and on the NFS server read the same, though) I'm about 80% sure this was working around 7.99.9, but for a number of reasons it's complicated for me to check older builds, and in any event odroid-c1 support is fairly new. I noticed some changes to the NFS code on 15 July ( http://mail-index.netbsd.org/source-changes/2015/07/15/msg067309.html ), but backing these out does not change the behavior. Has anyone else seen problems with NFS? Or with odroid-c1 or awge(4) in general? dmesg below in case it gives any hints: Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 The NetBSD Foundation, Inc. All rights reserved. Copyright (c) 1982, 1986, 1989, 1991, 1993 The Regents of the University of California. All rights reserved. NetBSD 7.99.20 (ODROID-iscsi) #3: Sun Aug 2 21:06:09 PDT 2015 r...@cassava.tastylime.net:/scratch/evbarm7/obj/sys/arch/evbarm/compile/ODROID-iscsi total memory = 1024 MB avail memory = 1007 MB sysctl_createv: sysctl_create(machine_arch) returned 17 timecounter: Timecounters tick every 10.000 msec mainbus0 (root) cpu0 at mainbus0 core 0: 1512 MHz Cortex-A5 r0p1 (Cortex V7A core) cpu0: DC enabled IC enabled WB disabled EABT branch prediction enabled cpu0: sctlr: 0xc51c7d cpu0: actlr: 0x6041 cpu0: revidr: 0x410fc051 cpu0: mpidr: 0x8200 cpu0: isar: [0]=0x10 [1]=0x13112111 [2]=0x21232041 [3]=0x2131, [4]=0x11142, [5]=0 cpu0: mmfr: [0]=0x100103 [1]=0x4000 [2]=0x123 [3]=0x102211 cpu0: pfr: [0]=0x1231 [1]=0x11 cpu0: 32KB/32B 2-way L1 VIPT Instruction cache cpu0: 32KB/32B 4-way write-back-locking-C L1 PIPT Data cache cpu0: 512KB/32B 8-way write-back L2 PIPT Unified cache vfp0 at cpu0: NEON MPE (VFP 3.0+), rounding, NaN propagation, denormals vfp0: mvfr: [0]=0x10110222 [1]=0x cpu1 at mainbus0 core 1 cpu2 at mainbus0 core 2 cpu3 at mainbus0 core 3 armperiph0 at mainbus0 armgic0 at armperiph0: Generic Interrupt Controller, 256 sources (245 valid) armgic0: 32 Priorities, 224 SPIs, 5 PPIs, 16 SGIs a9tmr0 at armperiph0: A5 Global 64-bit Timer (378 MHz) a9tmr0: interrupting on irq 27 a9wdt0 at armperiph0: A5 Watchdog Timer, default period is 12 seconds arml2cc0 at armperiph0: ARM PL310 r3p3 L2 Cache Controller (disabled) arml2cc0: cache enabled amlogicio0 at mainbus0 amlogiccom0 at amlogicio0 port 0: console amlogiccom0: interrupting at irq 122 amlogicgpio0 at amlogicio0: GPIO controller gpio0 at amlogicgpio0 (GPIOX): 22 pins gpio1 at amlogicgpio0 (GPIOY): 15 pins gpio2 at amlogicgpio0 (GPIODV): 30 pins gpio3 at amlogicgpio0 (GPIOH): 6 pins gpio4 at amlogicgpio0 (GPIOAO): 14 pins gpio5 at amlogicgpio0 (BOOT): 19 pins gpio6 at amlogicgpio0 (CARD): 7 pins genfb0 at amlogicio0: switching to framebuffer console genfb0: framebuffer at 0xc9e0, size 1280x720, depth 16, stride 2560 wsdisplay0 at genfb0 kbdmux 1: console (default, vt100 emulation) wsmux1: connecting to wsdisplay0 wsdisplay0: screen 1-3 added (default, vt100 emulation) amlogicrng0 at amlogicio0 dwctwo0 at amlogicio0 port 0: USB controller dwctwo1 at amlogicio0 port 1: USB controller awge0 at amlogicio0: Gigabit Ethernet Controller awge0: interrupting on irq 40 awge0: Ethernet address: 00:1e:06:c3:49:c1 rgephy0 at awge0 phy 0: RTL8169S/8110S/8211 1000BASE-T media interface, rev. 6 rgephy0: 10baseT, 10baseT-FDX, 100baseTX, 100baseTX-FDX, 1000baseT-FDX, auto rgephy1 at awge0 phy 1: RTL8169S/8110S/8211 1000BASE-T media interface, rev. 6 rgephy1: 10baseT, 10baseT-FDX, 100baseTX, 100baseTX-FDX, 1000baseT-FDX, auto amlogicsdhc0 at amlogicio0 port 1: SDHC controller amlogicsdhc0: interrupting on irq 110 amlogicrtc0 at amlogicio0: RTC battery not present or discharged usb0 at dwctwo0: USB revision 2.0 usb1 at dwctwo1: USB revision 2.0 timecounter: Timecounter clockinterrupt frequency 100 Hz quality 0 timecounter: Timecounter a9tmr0 frequency 37800 Hz quality 500 cpu2: 1512 MHz Cortex-A5 r0p1 (Cortex V7A core) cpu2: DC enabled IC enabled WB disabled EABT branch prediction enabled cpu2: sctlr: 0xc51c7d cpu2: actlr: 0x6041 cpu2: revidr: 0x410fc051 cpu2: mpidr: 0x8202 cpu2: isar: [0]=0x10 [1]=0x13112111 [2]=0x21232041 [3]=0x2131, [4]=0x11142, [5]=0 cpu2: mmfr: [0]=0x100103 [1]=0x4000 [2]=0x123 [3]=0x102211 cpu2: pfr: [0]=0x1231 [1]=0x11 cpu2: 32KB/32B 2-way L1 VIPT Instruction cache cpu2: 32KB/32B 4-way write-back-locking-C L1 PIPT Data cache cpu2: 512KB/32B 8-way write-back L2 PIPT Unified cache vfp2 at cpu2: NEON MPE (VFP 3.0+), rounding, NaN propagation, denormals vfp2: mvfr: [0]=0x10110222 [1]=0x cpu3: 1512 MHz Cortex-A5 r0p1 (Cortex V7A core) cpu3: DC enabled IC enabled WB disabled EABT branch prediction enabled cpu3: sctlr: 0xc51c7d cpu3: actlr: 0x6041 cpu3: revidr: 0x410fc051 cpu3: mpidr: 0x8203
Re: NFS writes being corrupted?
On 8/3/15 10:15 AM, Martin Husemann wrote: On Mon, Aug 03, 2015 at 09:02:19AM -0700, Jeff Rizzo wrote: I'm about 80% sure this was working around 7.99.9, but for a number of reasons it's complicated for me to check older builds, and in any event odroid-c1 support is fairly new. I noticed some changes to the NFS code on 15 July ( http://mail-index.netbsd.org/source-changes/2015/07/15/msg067309.html ), but backing these out does not change the behavior. What kind of differences do you see? Truncation to a multiple of page size? Last partial page filled with zeroes? Random corruption? Do you get identical content back when reading on the client directly after write? Typical culprit would be cache ops/pmap issues. Martin I need to look deeper, but a quick test writing lines of ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz Shows that corruption starts when the file is exactly 65536 bytes long (with an 8192 byte page size), with anything that size or longer getting corrupted. It seems to be randomly garbled - same size, same bytes, but shuffled around. When I was narrowing it down, I sometimes saw random corruption inserted at larger file sizes - at one point I saw short strings of NUL and the string posix2_upe (which would appear to be a symbol?) inserted at seemingly-random spots. +j
Re: Guidelines for choosing MACHINE MACHINE_ARCH?
On 6/24/15 7:13 AM, matthew green wrote: David Holland writes: I think keeping evb* for boards makes sense, though. i dunno. i don't see what it adds. in particular, evb means evaluation board, and there are heaps of things in evb* that are *not* evaluation boards, but stuff that might have once been once. i wish we'd just collapse as much as possible back to plain old MACHINE=MACHINE_ARCH=whatever. i just don't see any value or validity in evb. is ERLITE an evaluation board? what about the RPI or CUBIE* systems? they come pretty complete AFAICT, designed as end-user systems, not what we used to consider as being evalation boards. I agree that evb* is confusing and increasingly meaningless and would like to see us transition away from it. +j
Re: Interested in working on 'Rewriting Kernfs and Procfs'
On 7/27/14 10:10 PM, Sanchit Gupta wrote: Hi, I am Sanchit Gupta, an Undergraduate at University of Illinois Urbana Champaign. I am currently an intern at Vmware (Kernel Team). My mentor is Matthew Green, one of the members of NetBSD core group. I am really interested in operating systems and wanted to work on some of the projects under NetBSD to gain some experience, and also to get to know other developers out there who are also into systems programming. I know that I am a beginner at this, but it would be great if I could learn under the guidance of some of your developers. I wanted to get involved with development on NetBSD by working on Kernfs and Procfs. I would be grateful if you could get me hooked up with the team that is currently working on this issue. This (tech-kern) is the mailing list where you want to pose your questions. People generally respond very well to specific technical questions. (sometimes less well to open-ended nontechnical questions, which is why nobody has responded for a week, at least not on-list) As far as I know, nobody is working on kernfs or procfs, so if you have ideas, please share them! Also, I wanted to know whether the ZFS porting was completed or not as I am also interested in working on that project. ZFS is definitely in need of work. It builds, but is an older version and has many lurking bugs. Taylor Campbell (riastradh@) did some work on it back in 2012 which improved things (see http://mail-index.netbsd.org/source-changes/2012/10/15/msg037932.html ), but much remains to be done. Thanks, Sanchit +j
Re: Net BSD Improved Caching Project
On 10/30/13 3:19 PM, David Palzer wrote: Hi, I sent an email to you about a week ago expressing interest in working on the improved caching algorithm for Net BSD. Is that still available or is someone already working on it? - David Palzer David- Perhaps a link to where this project is described, so we can be sure we're talking about the same one? In general, as well, except for certain programs such as Google's Summer of Code (which is not currently running), if you don't see mailing list traffic about it, feel free to hop in and start contributing design ideas! +j
Why do we need lua in-tree again? Yet another call for actual evidence, please. (was Re: Moving Lua source codes)
On 10/14/13 1:46 PM, Marc Balmer wrote: It is entirely plausible to me that we could benefit from using Lua in base, or sysinst, or maybe even in the kernel. But that argument must be made by showing evidence of real, working code that has compelling benefits, together with confidence in its robustness -- not by saying that if we let users do it then it will happen. There is real word, real working code. In userland and in kernel space. There are developers waiting for the kernel parts to be committet, so they can continue their work as well. *Where* is this code? The pattern I see happening over and over again is: NetBSD Community: Please show us the real working code that needs this mbalmer: the code is there! (pointer to actual code not in evidence) I do not doubt that something exists, but the onus is on the person proposing the import to convince the skeptics, or at least to make an actual effort. I see lots of handwaving, and little actual code. YEARS after the import of lua into the main tree, I see very little in-tree evidence of its use. In fact, what I see is limited to : 1) evidence of lua bindings for netpgp. 2) evidence of some tests in external/bsd/lutok 3) the actual lua arc in external/mit/lua 4) gpio and sqlite stuff in liblua 5) some lua bindings in libexec/httpd (bozohttpd) 6) two example files in share/examples/lua 7) the luactl/lua module/lua(4) stuff you imported yesterday Am I missing something major here? The only actual usage I see is netpgp and httpd; the rest is all in support of lua itself. I do not see evidence that anyone is actually using lua in such a way that requires it in-tree. When you originally proposed importing lua back in 2010, you talked a lot about how uses would materialize. It's now been 3 years, and I just don't see them. If I am wrong about this, I would love some solid pointers to evidence of my wrongness. Now you're using very similar arguments for bringing lua into the kernel; I would very much like to see some real, practical, *useful* code demonstrating just why this is a good thing. Beyond the 'gee, whiz' factor, I just don't see it. +j
ibcs2 syscalls.master problem
The last time sys/compat/ibcs2/syscalls.master was edited [1] (July 2010), the dependent files were not regenerated. There was at least one typo (fixed), but there are also duplicate syscall names, which cause the generated files to break the i386 build. Can someone who knows what's what fix this, so the resulting files work? I did notice that FreeBSD's ibcs2 emulation has more info on at least one of the syscalls. Thanks, +j [1] http://mail-index.netbsd.org/source-changes/2010/07/23/msg011989.html
Re: MI boot args revamp?
On 12/29/12 1:12 PM, Greg Troxel wrote: I would like to have a way to pass a string composed of the same flags (we can continue to use our existing -a, -s and other flags) in a consistent manner from one platform to another, to be able to adjust driver options, kernel options, whatever, and to be able to expect it to be similar whether I'm on amd64, macppc, evbppc, evbarm, or whatever. Are you talking about the UI of how the strings are written and what they mean or how the bootloader stage that interacts with the user/prom communicates this to the kernel? For platforms with existing conventions, I don't see how we can interact with native bootloaders without meeting their interface. There are always going to be exceptions; certain platforms (especially older ones) are not flexible enough to do everything we want the way we want it. What I _would_ like to get to is this is the recommended goal to shoot for. +j
Re: iscsi initiator?
} } I had no trouble finding iscsi-target(8) and targets(5), supporting } target mode. But I've been unable to find initiator support in 5.1. } Did I just miss something, or is the Wikipedia page wrong, or what? } } The only initiator available for 5.x is a refuse-based userland initiator. I had thought it was installed by default in 5.1, but I guess not. It can be found in src/dist/iscsi/src on 5.x, and should be buildable from there. (it *is* installed in 6.0, which makes less sense, because 6.0 also has an in-kernel initiator) +j
Re: Assistance working with Marvell SoC NAND controller
On 11/3/12 9:35 AM, Robert Swindells wrote: Jeff Rizzo wrote: I'm trying to write a driver for the NAND controller on the Marvell Kirkwood (and I believe Orion as well) SoC (specifically the 88F6281, as used in my Sheevaplug), using the datasheet here: Have you got a copy of the NetBSD source tree that we got from Marvell, it might provide some extra clues ? No, I don't have that.I'm sure it would be useful! :) I'm getting an error on startup: mvsocnand0 at mvsoc0 unit 0 offset 0x10418-0x10473: Marvell SoC NAND controller nand0 at mvsocnand0NAND chip is write protected! Robert Swindells OK, I can probably fix this - but in the meantime, I've gotten a (mostly) working driver from someone else that I'm cleaning up and merging with my code. More soon. +j
Assistance working with Marvell SoC NAND controller
I'm trying to write a driver for the NAND controller on the Marvell Kirkwood (and I believe Orion as well) SoC (specifically the 88F6281, as used in my Sheevaplug), using the datasheet here: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf and also poking around Linux and u-boot source for examples of how they do things. From what I can tell, With some help from ahoka@ (who wrote nand_samsung.c, already in-tree) I have a driver attaching and getting *some* information from the NAND: mvsocnand0 at mvsoc0 unit 0 offset 0x10418-0x10473: Marvell SoC NAND controller nand0 at mvsocnand0: Legacy NAND Flash nand0: manufacturer id: 0xec (Samsung), device id: 0xdc nand0: page size: 2048 bytes, spare size: 64 bytes, block size: 65536 bytes nand0: LUN size: 0 blocks, LUNs: 0, total storage size: 384 MB nand0: column cycles: 0, row cycles: 0 flash0 at nand0: NAND flash partition size 1024 KB, offset 0, read only flash0: erase size 64 KB, page size 2048 bytes, write size 2048 bytes flash1 at nand0: NAND flash partition size 4096 KB, offset 0x10, read only flash1: erase size 64 KB, page size 2048 bytes, write size 2048 bytes flash2 at nand0: NAND flash partition size 507 MB, offset 0x50 flash2: erase size 64 KB, page size 2048 bytes, write size 2048 bytes My driver-so-far is here: http://www.tastylime.net/netbsd/mvsocnand.diff , and I added the following to SHEEVAPLUG: options NAND_DEBUG options NAND_VERBOSE mvsocnand*at mvsoc? offset ? irq ? nand0 at nandbus? flash0 at nand0 offset 0x00 size 0x0010 readonly 1 # uboot? flash1 at nand0 offset 0x0010 size 0x0040 readonly 1 # uimage flash2 at nand0 offset 0x0050 size 0x1fb0 # chfs? I haven't implemented all the functions yet, trying to get what I have so far working first. I think I'm not quite understanding how the command latch/address latch bit is supposed to work - I've done mvsocnand_command() and mvsocnand_address() based on what u-boot does, the datasheet, and ahoka's omap2_nand.c, but reading data from the NAND flash doesn't actually seem to work, and I'm hoping someone with more experience can spot something wrong that I'm doing... Thanks for any and all assistance, +j
KAUTH_PROCESS_SCHEDULER_*AFFINITY restricted to root in default secmodel?
I've just had my first occasion to play with the processor affinity code, via porting some code from linux. It was very straightforward, but there's one glaring difference: linux doesn't (by default, anyway) require root to use their sched_setaffinity(), while we do require root (by default) for pthread_setaffinity_np(). I don't pretend to understand the security ramifications regarding processor affinity; I do wonder, however, whether it warrants requiring elevated privilege (and possible exposure via other code in the process which doesn't require root for normal operation) to prevent allowing users to pin their own code to a particular cpu by default. Are we sure we've made the right (default) tradeoff here? For my own use, I know I can tweak the secmodel to permit KAUTH_PROCESS_SCHEDULER_SETAFFINITY . (and now I'm going to research how to actually do it. :) Thanks, +j
Re: extended attributes
On 6/9/11 9:37 AM, Emmanuel Dreyfus wrote: On Tue, Jun 07, 2011 at 08:07:02AM +, Emmanuel Dreyfus wrote: [autocreate extended attribute backend] Such a behavior could be triggered by a new kernel option such as UFS_EXTATTR_AUTOCREATE. It could hold the default size for autocreated attributes. (e.g.: options UFS_EXTATTR_AUTOCREATE=1024 to get 1024 bytes long attributes). Here is a patch that implements this option. Is it reasonable to commit it? I can't comment on the content of the patch itself, but I am definitely in favor of moving toward default support for extended attributes, and this _seems_ like a good start. Thank you for working on this. +j
Re: wedges on vnd(4) patch
On Mon, Jun 21, 2010 at 02:45:42PM +, Christos Zoulas wrote: - declare the variables on top to avoid braces in case statements. - KNF continuation lines (indent-by-four) and lose the ()'s around return Updated patch below. As an aside, this ss clearly more KNF, but I have to say it's harder to follow (not that this is all that complex) with the variable declaration so far separated from its usage. But I suppose that's another conversation for another day. :) +j -- Index: sys/dev/vnd.c === RCS file: /cvsroot/src/sys/dev/vnd.c,v retrieving revision 1.208 diff -p -r1.208 vnd.c *** sys/dev/vnd.c 2 Mar 2010 21:32:29 - 1.208 --- sys/dev/vnd.c 21 Jun 2010 18:51:36 - *** extern struct cfdriver vnd_cd; *** 254,259 --- 254,261 static struct vnd_softc *vnd_spawn(int); int vnd_destroy(device_t); + static struct dkdriver vnddkdriver = { vndstrategy, minphys }; + void vndattach(int num) { *** vnd_attach(device_t parent, device_t sel *** 282,288 sc-sc_comp_buff = NULL; sc-sc_comp_decombuf = NULL; bufq_alloc(sc-sc_tab, disksort, BUFQ_SORT_RAWBLOCK); ! disk_init(sc-sc_dkdev, device_xname(self), NULL); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, couldn't establish power handler\n); } --- 284,290 sc-sc_comp_buff = NULL; sc-sc_comp_decombuf = NULL; bufq_alloc(sc-sc_tab, disksort, BUFQ_SORT_RAWBLOCK); ! disk_init(sc-sc_dkdev, device_xname(self), vnddkdriver); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, couldn't establish power handler\n); } *** vndioctl(dev_t dev, u_long cmd, void *da *** 1019,1024 --- 1021,1028 #ifdef __HAVE_OLD_DISKLABEL struct disklabel newlabel; #endif + struct dkwedge_info *dkw; + struct dkwedge_list *dkwl; #ifdef DEBUG if (vnddebug VDB_FOLLOW) *** unlock_and_exit: *** 1501,1506 --- 1505,1537 VOP_UNLOCK(vnd-sc_vp, 0); return error; + case DIOCAWEDGE: + dkw = (void *) data; + + if ((flag FWRITE) == 0) + return EBADF; + + /* If the ioctl happens here, the parent is us. */ + strlcpy(dkw-dkw_parent, device_xname(vnd-sc_dev), + sizeof(dkw-dkw_parent)); + return dkwedge_add(dkw); + + case DIOCDWEDGE: + dkw = (void *) data; + + if ((flag FWRITE) == 0) + return EBADF; + + /* If the ioctl happens here, the parent is us. */ + strlcpy(dkw-dkw_parent, device_xname(vnd-sc_dev), + sizeof(dkw-dkw_parent)); + return dkwedge_del(dkw); + + case DIOCLWEDGES: + dkwl = (void *) data; + + return dkwedge_list(vnd-sc_dkdev, dkwl, l); + default: return ENOTTY; }
wedges on vnd(4) patch
I had occasion to use wedges on a vnd(4), only to be reminded that they're not currently supported. Anyone see a problem with this patch? I've given it light testing, and haven't come across any issues yet... +j Index: sys/dev/vnd.c === RCS file: /cvsroot/src/sys/dev/vnd.c,v retrieving revision 1.208 diff -p -r1.208 vnd.c *** sys/dev/vnd.c 2 Mar 2010 21:32:29 - 1.208 --- sys/dev/vnd.c 21 Jun 2010 04:24:56 - *** extern struct cfdriver vnd_cd; *** 254,259 --- 254,261 static struct vnd_softc *vnd_spawn(int); int vnd_destroy(device_t); + static struct dkdriver vnddkdriver = { vndstrategy, minphys }; + void vndattach(int num) { *** vnd_attach(device_t parent, device_t sel *** 282,288 sc-sc_comp_buff = NULL; sc-sc_comp_decombuf = NULL; bufq_alloc(sc-sc_tab, disksort, BUFQ_SORT_RAWBLOCK); ! disk_init(sc-sc_dkdev, device_xname(self), NULL); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, couldn't establish power handler\n); } --- 284,290 sc-sc_comp_buff = NULL; sc-sc_comp_decombuf = NULL; bufq_alloc(sc-sc_tab, disksort, BUFQ_SORT_RAWBLOCK); ! disk_init(sc-sc_dkdev, device_xname(self), vnddkdriver); if (!pmf_device_register(self, NULL, NULL)) aprint_error_dev(self, couldn't establish power handler\n); } *** unlock_and_exit: *** 1501,1506 --- 1503,1541 VOP_UNLOCK(vnd-sc_vp, 0); return error; + case DIOCAWEDGE: + { + struct dkwedge_info *dkw = (void *) data; + + if ((flag FWRITE) == 0) + return (EBADF); + + /* If the ioctl happens here, the parent is us. */ + strlcpy(dkw-dkw_parent, device_xname(vnd-sc_dev), + sizeof(dkw-dkw_parent)); + return (dkwedge_add(dkw)); + } + + case DIOCDWEDGE: + { + struct dkwedge_info *dkw = (void *) data; + + if ((flag FWRITE) == 0) + return (EBADF); + + /* If the ioctl happens here, the parent is us. */ + strlcpy(dkw-dkw_parent, device_xname(vnd-sc_dev), + sizeof(dkw-dkw_parent)); + return (dkwedge_del(dkw)); + } + + case DIOCLWEDGES: + { + struct dkwedge_list *dkwl = (void *) data; + + return (dkwedge_list(vnd-sc_dkdev, dkwl, l)); + } + default: return ENOTTY; }