Re: savecore: first and last dump headers disagree on /dev/ad0b

2003-10-07 Thread Tor Egge

> Hello.
> -CURRENT as of yesterday can't save kernel dump:
> 
>   savecore: first and last dump headers disagree on /dev/ad0b
>   savecore: unsaved dumps found but not saved
> 
> Is this a known issue? 

Yes.

I had the same problem on my development machine at the end of August and
ended up using the enclosed patch to get working dumps.

- Tor Egge
Index: sys/dev/ata/ata-all.c
===
RCS file: /home/ncvs/src/sys/dev/ata/ata-all.c,v
retrieving revision 1.187
diff -u -r1.187 ata-all.c
--- sys/dev/ata/ata-all.c   27 Aug 2003 15:27:56 -  1.187
+++ sys/dev/ata/ata-all.c   31 Aug 2003 22:31:33 -
@@ -109,9 +109,11 @@
 ch->device[MASTER].channel = ch;
 ch->device[MASTER].unit = ATA_MASTER;
 ch->device[MASTER].mode = ATA_PIO;
+ch->device[MASTER].dumping = 0;
 ch->device[SLAVE].channel = ch;
 ch->device[SLAVE].unit = ATA_SLAVE;
 ch->device[SLAVE].mode = ATA_PIO;
+ch->device[SLAVE].dumping = 0;
 ch->dev = dev;
 ch->state = ATA_IDLE;
 bzero(&ch->queue_mtx, sizeof(struct mtx));
Index: sys/dev/ata/ata-all.h
===
RCS file: /home/ncvs/src/sys/dev/ata/ata-all.h,v
retrieving revision 1.65
diff -u -r1.65 ata-all.h
--- sys/dev/ata/ata-all.h   25 Aug 2003 11:13:04 -  1.65
+++ sys/dev/ata/ata-all.h   31 Aug 2003 22:56:14 -
@@ -227,6 +227,7 @@
 
 intcmd;/* last cmd executed */
 intmode;   /* transfermode */
+intdumping;/* panic dump in progress */
 void   (*setmode)(struct ata_device *atadev, int mode);
 };
 
Index: sys/dev/ata/ata-disk.c
===
RCS file: /home/ncvs/src/sys/dev/ata/ata-disk.c,v
retrieving revision 1.159
diff -u -r1.159 ata-disk.c
--- sys/dev/ata/ata-disk.c  25 Aug 2003 09:01:49 -  1.159
+++ sys/dev/ata/ata-disk.c  31 Aug 2003 23:18:40 -
@@ -336,6 +336,26 @@
 if (!adp)
return ENXIO;
 
+/* Some chipsets must be configured for PIO before dump starts. */
+if (adp->device->dumping == 0) {
+   adp->device->dumping = 1;
+   adp->device->setmode(adp->device, ATA_PIO_MAX);
+}
+if (length == 0) {
+   int error = 0;
+   /* Commit dump to media */
+   if (adp->device->param != NULL &&
+   adp->device->param->support.command2 & ATA_SUPPORT_FLUSHCACHE) {
+   error = ata_controlcmd(adp->device, ATA_FLUSHCACHE, 0, 0, 0);
+   if (error != 0)
+   ata_prtdev(adp->device, "Flush cache failed\n");
+   else
+   ata_prtdev(adp->device, "Flush cache succeeded\n");
+   } else
+   ata_prtdev(adp->device, "Flush cache skipped\n");
+   return error;
+}
+
 bzero(&request, sizeof(struct ata_request));
 request.device = adp->device;
 request.data = virtual;
@@ -352,13 +372,15 @@
 
 if (adp->device->channel->hw.transaction(&request) == ATA_OP_FINISHED)
return EIO;
-while (request.bytecount > request.donecount) {
+while (adp->device->channel->running == &request) {
DELAY(20);
-   adp->device->channel->running = &request;
adp->device->channel->hw.interrupt(adp->device->channel);
-   adp->device->channel->running = NULL;
if (request.status & ATA_S_ERROR)
return EIO;
+}
+if (request.bytecount > request.donecount) {
+   printf("Short write?");
+   return EIO;
 }
 return 0;
 }
Index: sys/dev/ata/ata-queue.c
===
RCS file: /home/ncvs/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.4
diff -u -r1.4 ata-queue.c
--- sys/dev/ata/ata-queue.c 28 Aug 2003 08:22:53 -  1.4
+++ sys/dev/ata/ata-queue.c 31 Aug 2003 22:35:30 -
@@ -106,9 +106,36 @@
 ata_controlcmd(struct ata_device *atadev, u_int8_t command, u_int16_t feature,
   u_int64_t lba, u_int16_t count)
 {
-struct ata_request *request = ata_alloc_request();
+struct ata_request *request;
 int error = ENOMEM;
 
+/* Handle calls from addump */
+if (atadev->dumping) {
+   struct ata_request request2;
+   request = &request2;
+   bzero(request, sizeof(struct ata_request));
+   request->device = atadev;
+   request->u.ata.command = command;
+   request->u.ata.lba = lba;
+   request->u.ata.count = count;
+   request->u.ata.feature = feature;
+   request->flags = ATA_R_CONTROL;
+   request->timeout = 5;
+   if (atadev->

Re: HEADSUP: UMA not reentrant / possible memory leak

2003-07-29 Thread Tor . Egge
> The indication of this is that the g_bio zone does not return to
> zero USED as it should.

It looks like z->uz_cachefree is slightly out of date (updated in
zone_timout() every 20th second) and often too low (not taking the
z->uz_full_bucket list into account).

The enclosed patch recalculates the number of free elements on the
buckets instead of using z->uz_cachefree.

- Tor Egge

Index: sys/vm/uma_core.c
===
RCS file: /home/ncvs/src/sys/vm/uma_core.c,v
retrieving revision 1.63
diff -u -r1.63 uma_core.c
--- sys/vm/uma_core.c   28 Jul 2003 02:29:07 -  1.63
+++ sys/vm/uma_core.c   30 Jul 2003 01:05:37 -
@@ -2092,6 +2092,10 @@
char *tmpbuf, *offset;
uma_zone_t z;
char *p;
+   int cpu;
+   int cachefree;
+   uma_bucket_t bucket;
+   uma_cache_t cache;
 
cnt = 0;
mtx_lock(&uma_mtx);
@@ -2112,8 +2116,27 @@
LIST_FOREACH(z, &uma_zones, uz_link) {
if (cnt == 0)   /* list may have changed size */
break;
+   for (cpu = 0; cpu < maxcpu; cpu++) {
+   if (CPU_ABSENT(cpu))
+   continue;
+   CPU_LOCK(cpu);
+   }
ZONE_LOCK(z);
-   totalfree = z->uz_free + z->uz_cachefree;
+   cachefree = 0;
+   for (cpu = 0; cpu < maxcpu; cpu++) {
+   if (CPU_ABSENT(cpu))
+   continue;
+   cache = &z->uz_cpu[cpu];
+   if (cache->uc_allocbucket != NULL)
+   cachefree += cache->uc_allocbucket->ub_ptr + 1;
+   if (cache->uc_freebucket != NULL)
+   cachefree += cache->uc_freebucket->ub_ptr + 1;
+   CPU_UNLOCK(cpu);
+   }
+   LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
+   cachefree += bucket->ub_ptr + 1;
+   }
+   totalfree = z->uz_free + cachefree;
len = snprintf(offset, linesize,
"%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
z->uz_name, z->uz_size,
___
[EMAIL PROTECTED] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "[EMAIL PROTECTED]"


Re: 3dmd broken

2003-05-29 Thread Tor . Egge
> Hi,
> 
> I installed 3dmd the webtool for 3ware Raid controllers, I left the 
> config untouched or set whatever port, the prog starts but doesnt bind 
> any port, thus no connection possible, is it broken ?
> 
> im on 5.1b
> 
> id be interested if anyone running it with 5.1b

If you unpack the following snippet

---
begin 644 3dmd.bpatch.144284
6#[IL)`0-N/L```#-@'+KA=)T`C'`PP``
`
end
---

using uudecode, you should get the following checksum:

MD5 (3dmd.bpatch.144284) = b1116c4846262ee3d523db05b82717c3

To apply:

   cp -p 3dmd 3dmd.FCS
   dd if=3dmd.bpatch.144284 of=3dmd seek=144284 bs=1 conv=notrunc

After which you should have the following checksums:

MD5 (3dmd.FCS) = e6b0212a2b7ce6f4892fea7751c8d711
MD5 (3dmd) = 8e2254774524dcc88d31acb8c4882779

The above patch changes 3dmd to supply the RFTRHEAD argument to rfork().

An alternative fix is to apply the enclosed patch, which adds tracking
of (process leader, file descriptor table) tuples.

- Tor Egge
Index: sys/kern/kern_fork.c
===
RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.198
diff -u -r1.198 kern_fork.c
--- sys/kern/kern_fork.c13 May 2003 20:35:59 -  1.198
+++ sys/kern/kern_fork.c18 May 2003 15:01:14 -
@@ -139,13 +139,6 @@
/* Don't allow kernel only flags. */
if ((uap->flags & RFKERNELONLY) != 0)
return (EINVAL);
-   /* 
-* Don't allow sharing of file descriptor table unless
-* RFTHREAD flag is supplied
-*/
-   if ((uap->flags & (RFPROC | RFTHREAD | RFFDG | RFCFDG)) ==
-   RFPROC)
-   return(EINVAL);
error = fork1(td, uap->flags, 0, &p2);
if (error == 0) {
td->td_retval[0] = p2 ? p2->p_pid : 0;
@@ -209,6 +202,7 @@
int ok;
static int pidchecked = 0;
struct filedesc *fd;
+   struct filedesc_to_leader *fdtol;
struct proc *p1 = td->td_proc;
struct thread *td2;
struct kse *ke2;
@@ -419,15 +413,40 @@
/*
 * Copy filedesc.
 */
-   if (flags & RFCFDG)
+   if (flags & RFCFDG) {
fd = fdinit(td->td_proc->p_fd);
-   else if (flags & RFFDG) {
+   fdtol = NULL;
+   } else if (flags & RFFDG) {
FILEDESC_LOCK(p1->p_fd);
fd = fdcopy(td->td_proc->p_fd);
FILEDESC_UNLOCK(p1->p_fd);
-   } else
+   fdtol = NULL;
+   } else {
fd = fdshare(p1->p_fd);
-
+   if (p1->p_fdtol == NULL)
+   p1->p_fdtol =
+   filedesc_to_leader_alloc(NULL,
+NULL,
+p1->p_leader);
+   if ((flags & RFTHREAD) != 0) {
+   /*
+* Shared file descriptor table and
+* shared process leaders.
+*/
+   fdtol = p1->p_fdtol;
+   FILEDESC_LOCK(p1->p_fd);
+   fdtol->fdl_refcount++;
+   FILEDESC_UNLOCK(p1->p_fd);
+   } else {
+   /* 
+* Shared file descriptor table, and
+* different process leaders 
+*/
+   fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
+p1->p_fd,
+p2);
+   }
+   }
/*
 * Make a proc table entry for the new process.
 * Start by zeroing the section of proc that is zero-initialized,
@@ -506,6 +525,7 @@
if (p2->p_textvp)
VREF(p2->p_textvp);
p2->p_fd = fd;
+   p2->p_fdtol = fdtol;
PROC_UNLOCK(p1);
PROC_UNLOCK(p2);
 
Index: sys/kern/kern_descrip.c
===
RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.196
diff -u -r1.196 kern_descrip.c
--- sys/kern/kern_descrip.c 15 May 2003 21:13:08 -  1.196
+++ sys/kern/kern_descrip.c 20 May 2003 13:42:01 -
@@ -73,6 +73,8 @@
 #include 
 
 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
+static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
+"file desc to leader structures");
 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
 
 static uma_zone_t file_zone;
@@ -456,6 +458,7 @@
struct file *fp;
struct file *delfp;
int error, newfd;
+   int h

Re: Panics instead of Hard Locks

2002-11-10 Thread Tor . Egge
> 
> Since going from a SMP to nonSMP kernel the Hard Locks don't
> seem to be happening.  However I'm getting panics.
> 
> I've gotten 4 'sleeping thread owns a mutex' panics and one each
> of 'Assertion i != 0 failed at ../../../kern/subr_witness.c:669'
> and 'Duplicate free of item 0xc3895cc0 from zone 0xc0ea63c0(VMSPACE)'

The 'Duplicate free' can be caused by a race between swapout_procs()
and kern_exit()+wait1().

The enclosed patch might help.

Disabling swapping (sysctl vm.swap_enabled=0) can also help.

- Tor Egge

Index: sys/kern/kern_exit.c
===
RCS file: /home/ncvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.184
diff -u -r1.184 kern_exit.c
--- sys/kern/kern_exit.c15 Oct 2002 00:14:32 -  1.184
+++ sys/kern/kern_exit.c10 Nov 2002 17:58:39 -
@@ -285,7 +285,7 @@
 * Can't free the entire vmspace as the kernel stack
 * may be mapped within that space also.
 */
-   if (--vm->vm_refcnt == 0) {
+   if (vm->vm_refcnt == 1) {
if (vm->vm_shm)
shmexit(p);
pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map),
Index: sys/vm/vm_map.c
===
RCS file: /home/ncvs/src/sys/vm/vm_map.c,v
retrieving revision 1.271
diff -u -r1.271 vm_map.c
--- sys/vm/vm_map.c 9 Nov 2002 21:26:49 -   1.271
+++ sys/vm/vm_map.c 10 Nov 2002 17:59:40 -
@@ -314,11 +317,9 @@
struct vmspace *vm;
 
GIANT_REQUIRED;
-   if (p == p->p_vmspace->vm_freer) {
-   vm = p->p_vmspace;
-   p->p_vmspace = NULL;
-   vmspace_dofree(vm);
-   }
+   vm = p->p_vmspace;
+   p->p_vmspace = NULL;
+   vmspace_free(vm);
 }
 
 /*



Re: What is going on?

2002-06-26 Thread Tor . Egge

> 
> sure enough:
> ref4# grep
> /usr/libexec/ld-elf.so.1: grep: Shared object has no run-time symbol table
> ref4# 
> 
> 
> huh?
> 
> freshly cvsup'd sources..


Too many pages were prefaulted in pmap_object_init_pt, thus the wrong
physical page was entered in the pmap for the virtual address where
the .dynamic section data was supposed to be.


Index: sys/i386/i386/pmap.c
===
RCS file: /home/ncvs/src/sys/i386/i386/pmap.c,v
retrieving revision 1.325
diff -u -r1.325 pmap.c
--- sys/i386/i386/pmap.c26 Jun 2002 20:32:51 -  1.325
+++ sys/i386/i386/pmap.c27 Jun 2002 03:04:51 -
@@ -2493,7 +2504,7 @@
((objpgs > 0) && (p != NULL));
p = TAILQ_NEXT(p, listq)) {
 
-   if (p->pindex < pindex || p->pindex - pindex > psize) {
+   if (p->pindex < pindex || p->pindex - pindex >= psize) {
continue;
    }
tmpidx = p->pindex - pindex;

- Tor Egge

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: Crash after world/kernel upgrade

2002-06-11 Thread Tor . Egge

> System crashed after updating today.
> During the start of system services, in specific
> at the start of sendmail the system crashes with
> the new kernel. :/

There are some problems with the inpcb locking:

  - attempts to destroy held lock in in_pcbdetach.

  - typo in unlocking (causing recursive lock instead)

  - lack of inet6 support for inpcb locking, e.g. no
handling of locks in in6_pcbdetach.
 
I had to comment out INET6 from my kernel config file and apply the
enclosed patch to get my machine to boot today.

- Tor Egge



Index: sys/netinet/in_pcb.c
===
RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.106
diff -u -r1.106 in_pcb.c
--- sys/netinet/in_pcb.c10 Jun 2002 20:05:36 -  1.106
+++ sys/netinet/in_pcb.c11 Jun 2002 16:13:29 -
@@ -573,6 +573,11 @@
rtfree(inp->inp_route.ro_rt);
ip_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
+   /* XXX: Kludge: Unlock inp before crashing */
+   if (mtx_owned(&inp->inp_mtx)) {
+   printf("Warning: INP_LOCK held in in_pcbdetach\n");
+   INP_UNLOCK(inp);
+   }
INP_LOCK_DESTROY(inp);
uma_zfree(ipi->ipi_zone, inp);
 }
@@ -741,7 +746,7 @@
}
INP_UNLOCK(inp);
}
-   INP_INFO_RLOCK(pcbinfo);
+   INP_INFO_RUNLOCK(pcbinfo);
 }
 
 /*



Re: memset() broken in gcc-3.1 on i386's

2002-06-03 Thread Tor . Egge

> Actually, it broke fsck_ffs.
> 
> Workaround to avoid the known broken case:

The brokenness in ix86_expand_clrstr is quite visible when you
compare the function with ix86_expand_movstr.

- Tor Egge



Index: contrib/gcc/config/i386/i386.c
===
RCS file: /home/ncvs/src/contrib/gcc/config/i386/i386.c,v
retrieving revision 1.9
diff -u -r1.9 i386.c
--- contrib/gcc/config/i386/i386.c  9 May 2002 22:42:39 -   1.9
+++ contrib/gcc/config/i386/i386.c  4 Jun 2002 00:18:49 -
@@ -9432,7 +9432,7 @@
 gen_rtx_SUBREG (SImode, zeroreg, 0)));
   if (TARGET_64BIT && (align <= 4 || count == 0))
{
- rtx label = ix86_expand_aligntest (destreg, 2);
+ rtx label = ix86_expand_aligntest (countreg, 4);
  emit_insn (gen_strsetsi (destreg,
   gen_rtx_SUBREG (SImode, zeroreg, 0)));
  emit_label (label);
@@ -9443,7 +9443,7 @@
 gen_rtx_SUBREG (HImode, zeroreg, 0)));
   if (align <= 2 || count == 0)
{
- rtx label = ix86_expand_aligntest (destreg, 2);
+ rtx label = ix86_expand_aligntest (countreg, 2);
  emit_insn (gen_strsethi (destreg,
   gen_rtx_SUBREG (HImode, zeroreg, 0)));
  emit_label (label);
@@ -9454,7 +9454,7 @@
 gen_rtx_SUBREG (QImode, zeroreg, 0)));
   if (align <= 1 || count == 0)
{
- rtx label = ix86_expand_aligntest (destreg, 1);
+ rtx label = ix86_expand_aligntest (countreg, 1);
  emit_insn (gen_strsetqi (destreg,
   gen_rtx_SUBREG (QImode, zeroreg, 0)));
  emit_label (label);



Re: Hang on flushing buffers w/today's -CURRENT, SMP system

2002-02-08 Thread Tor . Egge

> h  so what is the difference between your kernel and mine that works?
> 
> just out of curiosity, have you tried a very latest -current? 
> do you have your own config? how does GENERIC behave?
> (what kind of disks do you have?)

It looks like a call to setrunqueue() was incorrectly dropped in 
the latest version of kern_shutdown.c.

Index: kern_shutdown.c
===
RCS file: /home/ncvs/src/sys/kern/kern_shutdown.c,v
retrieving revision 1.118
diff -u -r1.118 kern_shutdown.c
--- kern_shutdown.c 7 Feb 2002 20:58:44 -   1.118
+++ kern_shutdown.c 9 Feb 2002 01:11:18 -
@@ -272,6 +272,7 @@
DROP_GIANT();
for (subiter = 0; subiter < 50 * iter; subiter++) {
mtx_lock_spin(&sched_lock);
+   setrunqueue(curthread);
curthread->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch(); /* Allow interrupt threads to run 
*/
mtx_unlock_spin(&sched_lock);


- Tor Egge

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: boot() called on cpu #1 - hang

2001-09-08 Thread Tor . Egge

> Hello,
> 
> on a 5.0-current i386-SMP system of today I am still getting on about
> every second reboot the message:
> 
> boot() called on cpu #1
> W

Try applying the enclosed patch.

- Tor Egge




Index: vm_machdep.c
===
RCS file: /home/ncvs/src/sys/i386/i386/vm_machdep.c,v
retrieving revision 1.169
diff -u -r1.169 vm_machdep.c
--- vm_machdep.c4 Sep 2001 08:36:46 -   1.169
+++ vm_machdep.c4 Sep 2001 19:58:38 -
@@ -424,8 +433,13 @@
 {
 
cpu_reset_proxy_active = 1;
+   wbinvd();
while (cpu_reset_proxy_active == 1)
;/* Wait for other cpu to see that we've started */
+   cpu_reset_proxy_active = 3;
+   wbinvd();
+   while (cpu_reset_proxy_active == 3)
+   ;  /* Wait for other cpu to enable interrupts */
stop_cpus((1<


Re: linux threads and fclose lock problem

2001-08-28 Thread Tor . Egge

> The following test program hangs on current from 8/20/2001.
> The program hangs in the fprintf to the function testThread(),
> instead of running to completion. If the call in main to fclose()
> of an unrelated file descriptor is removed the program runs to 
> completion.
> 
> >From tracing code, it appears that fclose.c locks the file, does some stuff,
> and then *tries* to unlock the file.  But while _flockfile is called,
> _funlockfile is *not*.  (The source for fclose.c calls FUNLOCKFILE(fp) -
> don't know where FUNLOCKFILE is defined.)

_funlockfile is called, but fp->_file is -1 so _funlockfile just
returned.  Under RELENG_4, fp->_file is set to -1 after the
FUNLOCKFILE() statement.

A patch for disabling uthread_file.c in the linuxthreads port for
FreeBSD 5.0-CURRENT is enclosed.  This causes the libc version of
_flockfile to be used.

- Tor Egge



? work
Index: files/uthread_file.c
===
RCS file: /home/ncvs/ports/devel/linuxthreads/files/uthread_file.c,v
retrieving revision 1.3
diff -u -r1.3 uthread_file.c
--- files/uthread_file.c4 Jan 2000 00:05:22 -   1.3
+++ files/uthread_file.c29 Aug 2001 01:12:32 -
@@ -45,6 +45,8 @@
 #include "spinlock.h"
 #include "restart.h"
 
+#if __FreeBSD__ == 4
+
 /*
  * Weak symbols for externally visible functions in this file:
  */
@@ -396,3 +398,5 @@
}
_SPINUNLOCK(&hash_lock);
 }
+
+#endif



Re: Interruptable hang starting init in today's -CURRENT

2001-07-22 Thread Tor . Egge


> Interestingly, "sysctl -a -N" spits out names, but then seems to fall
> into a rut:
> 
[]
> net.inet.udp.getcred
> net.inet.accf.unloadable
> net.inet.accf.373
> net.inet.accf.373
> net.inet.accf.373
[]

> Looks as if it's looping with no termination conditions being matched.

When I got the same problem on my -current machine today, I found that
net.inet.accf and net.inet.raw had the same oid.

The system booted normally after changing the start oid for
dynamically assigned sysctl entries from 100 to 256.

- Tor Egge



Index: sys/kern/kern_sysctl.c
===
RCS file: /home/ncvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.110
diff -u -r1.110 kern_sysctl.c
--- sys/kern/kern_sysctl.c  2001/06/22 19:54:38 1.110
+++ sys/kern/kern_sysctl.c  2001/07/22 09:33:11
@@ -110,10 +110,10 @@
/*
 * If this oid has a number OID_AUTO, give it a number which
 * is greater than any current oid.  Make sure it is at least
-* 100 to leave space for pre-assigned oid numbers.
+* 256 to leave space for pre-assigned oid numbers.
 */
if (oidp->oid_number == OID_AUTO) {
-   static int newoid = 100;
+   static int newoid = 256;
 
oidp->oid_number = newoid++;
if (newoid == 0x7fff)



Re: kernel with SSE is unstable

2001-07-17 Thread Tor . Egge


> Good.
> 
> I want all use of the cpu number removed.  It seems to be just to avoid
> alignment problems that shouldn't happen in practice (the save area
> should always be suitably aligned if it isn't already, and I think it
> is already).

The pcb_save area has the proper alignment but the dummy variable used
in npxinit might not have the proper alignment when on the stack.

The enclosed patch should be a step in the right direction.

- Tor Egge



Index: sys/i386/isa/npx.c
===
RCS file: /home/ncvs/src/sys/i386/isa/npx.c,v
retrieving revision 1.105
diff -u -r1.105 npx.c
--- sys/i386/isa/npx.c  2001/07/16 06:00:23 1.105
+++ sys/i386/isa/npx.c  2001/07/16 16:54:13
@@ -564,7 +564,7 @@
 npxinit(control)
u_short control;
 {
-   union savefpu dummy;
+   static union savefpu dummy;
critical_t savecrit;
 
if (!npx_exists)
@@ -926,30 +926,21 @@
 fpusave(addr)
union savefpu *addr;
 {
-   static struct savexmm svxmm[MAXCPU];
-   u_char oncpu = PCPU_GET(cpuid);

if (!cpu_fxsr)
fnsave(addr);
-   else {
-   fxsave(&svxmm[oncpu]);
-   bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
-   }
+   else
+   fxsave(addr);
 }
 
 static void
 fpurstor(addr)
union savefpu *addr;
 {
-   static struct savexmm svxmm[MAXCPU];
-   u_char oncpu = PCPU_GET(cpuid);
-
if (!cpu_fxsr)
frstor(addr);
-   else {
-   bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
-   fxrstor(&svxmm[oncpu]);
-   }
+   else
+   fxrstor(addr);
 }
 
 #ifdef I586_CPU_XXX



Re: kernel with SSE is unstable

2001-07-15 Thread Tor . Egge

> In my system kernel(WITH SSE) falls when I use commands netstat and swapinfo.
> kernel without SSE works fine.

I got a very similar panic when trying an UP kernel with SSE enabled.

mi_switch() sets curproc->p_oncpu to NOCPU before calling
cpu_switch().  cpu_switch() might call npxsave() which calls fpusave
with NOCPU as the 'oncpu' argument.

A suggested patch is enclosed.

- Tor Egge



Index: sys/i386/isa/npx.c
===
RCS file: /home/ncvs/src/sys/i386/isa/npx.c,v
retrieving revision 1.104
diff -u -r1.104 npx.c
--- sys/i386/isa/npx.c  2001/07/12 12:21:53 1.104
+++ sys/i386/isa/npx.c  2001/07/15 16:23:02
@@ -160,8 +160,8 @@
 #endif
 static int npx_probe   __P((device_t dev));
 static int npx_probe1  __P((device_t dev));
-static voidfpusave __P((union savefpu *, u_char));
-static voidfpurstor__P((union savefpu *, u_char));
+static voidfpusave __P((union savefpu *));
+static voidfpurstor__P((union savefpu *));
 #ifdef I586_CPU_XXX
 static longtimezero__P((const char *funcname,
 void (*func)(void *buf, size_t len)));
@@ -579,7 +579,7 @@
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
-   fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
+   fpusave(&PCPU_GET(curpcb)->pcb_save);
start_emulating();
critical_exit(savecrit);
 }
@@ -881,7 +881,7 @@
 * fnsave are broken, so our treatment breaks fnclex if it is the
 * first FPU instruction after a context switch.
 */
-   fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
+   fpurstor(&PCPU_GET(curpcb)->pcb_save);
critical_exit(s);
 
return (1);
@@ -916,18 +916,18 @@
 {
 
stop_emulating();
-   fpusave(addr, curproc->p_oncpu);
+   fpusave(addr);
 
start_emulating();
PCPU_SET(npxproc, NULL);
 }
 
 static void
-fpusave(addr, oncpu)
+fpusave(addr)
union savefpu *addr;
-   u_char oncpu;
 {
static struct savexmm svxmm[MAXCPU];
+   u_char oncpu = PCPU_GET(cpuid);

if (!cpu_fxsr)
fnsave(addr);
@@ -938,11 +938,11 @@
 }
 
 static void
-fpurstor(addr, oncpu)
+fpurstor(addr)
union savefpu *addr;
-   u_char oncpu;
 {
static struct savexmm svxmm[MAXCPU];
+   u_char oncpu = PCPU_GET(cpuid);
 
if (!cpu_fxsr)
frstor(addr);



Re: ps 'D' state - ?

2001-06-16 Thread Tor . Egge

> netch@iv:~>ps 218 -l
>   UID   PID  PPID CPU PRI NI   VSZ  RSS WCHAN  STAT  TT   TIME COMMAND
> 0   218 1   0   8  0  1120  176 nanslp DWs   ??0:02.31 diskcheckd:
> 
> Are `select' and `nanosleep' disk uninterruptable waits? ;|

No.  The ps command gave wrong output.

- Tor Egge



Index: bin/ps/print.c
===
RCS file: /home/ncvs/src/bin/ps/print.c,v
retrieving revision 1.47
diff -u -r1.47 print.c
--- bin/ps/print.c  2001/05/03 11:49:44 1.47
+++ bin/ps/print.c  2001/06/16 21:13:16
@@ -176,7 +176,7 @@
 
v = ve->var;
flag = k->ki_p->ki_flag;
-   sflag = k->ki_p->ki_flag;
+   sflag = k->ki_p->ki_sflag;
cp = buf;
 
switch (k->ki_p->ki_stat) {



Re: anyone seen these outside of alpha? or on non-SMP?

2001-06-08 Thread Tor . Egge

> Why can't a filesystem hacker back it out until his return?  Things are
> not getting better and this is tripping up more and more people.

The enclosed patch might help somewhat against the "active pagedep"
panics introduced in revision 1.98 of ffs_softdep.c.  Instead of a
panic, a message is printed and the pagedep structure isn't freed (it
will be freed later by free_newdirblk()).

- Tor Egge



Index: sys/ufs/ffs/ffs_softdep.c
===
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.98
diff -u -r1.98 ffs_softdep.c
--- sys/ufs/ffs/ffs_softdep.c   2001/06/05 01:49:37 1.98
+++ sys/ufs/ffs/ffs_softdep.c   2001/06/07 18:30:16
@@ -1932,14 +1932,16 @@
WORKLIST_INSERT(&inodedep->id_bufwait,
&dirrem->dm_list);
}
+   
+   WORKLIST_REMOVE(&pagedep->pd_list);
if ((pagedep->pd_state & NEWBLOCK) != 0) {
-   FREE_LOCK(&lk);
-   panic("deallocate_dependencies: "
- "active pagedep");
+   /* XXX: Wait for newdirblk to be freed */
+   printf("deallocate_dependencies: "
+  "active pagedep\n");
+   } else {
+   LIST_REMOVE(pagedep, pd_hash);
+   WORKITEM_FREE(pagedep, D_PAGEDEP);
}
-   WORKLIST_REMOVE(&pagedep->pd_list);
-   LIST_REMOVE(pagedep, pd_hash);
-   WORKITEM_FREE(pagedep, D_PAGEDEP);
continue;
 
case D_ALLOCINDIR:



Re: anyone seen these outside of alpha? or on non-SMP?

2001-06-06 Thread Tor . Egge


> My guess would be that the inode in question is a directory inode,
> and that there are temp files there, or a lot of open files, but
> that is just a ballpark guess.

Correct.  A sample program to reproduce this problem is enclosed.
When a diradd dependency that causes a newdirblk dependency to be
allocated is made obsolete in newdirrem(), the pagedep structure is
likely to be freed without first removing the newdirblk dependency
that still points to the pagedep structure.

- Tor Egge




#!/bin/sh

dovmstat() {
  vmstat -m |  awk '/^ *(mkdir|newdirblk|dirrem|diradd|pagedep)/ { print }'
}

dovmstat
rm -rf a
dirrems=`vmstat -m |  awk '/^ *dirrem/ { print $2 }'`
while test $dirrems -gt 0
do
  sync
  sleep 1
  dirrems=`vmstat -m |  awk '/^ *dirrem/ { print $2 }'`
done
mkdir a
mkdirs=`vmstat -m |  awk '/^ *mkdir/ { print $2 }'`
while test $mkdirs -gt 0
do
  sync
  sleep 1
  mkdirs=`vmstat -m |  awk '/^ *mkdir/ { print $2 }'`
done
dovmstat
touch a/000
dovmstat
touch a/001
dovmstat
touch a/002
dovmstat
touch a/003
dovmstat
touch a/004
dovmstat
touch a/005
dovmstat
touch a/006
dovmstat
touch a/007
dovmstat
touch a/007
dovmstat
touch a/008
dovmstat
touch a/009
dovmstat
touch a/00a
dovmstat
touch a/00b
dovmstat
touch a/00c
dovmstat
touch a/00d
dovmstat
touch a/00e
dovmstat
touch a/00f
dovmstat
rm a/00f
dovmstat
ls -ld a
dovmstat
rm -rf a
dovmstat
echo FINISHED



Re: freelist corruption

2001-05-27 Thread Tor . Egge

> Peter Jeremy wrote:
> > 
> > On 2001-May-27 20:36:54 -0700, Kris Kennaway <[EMAIL PROTECTED]> wrote:
> > >I've been getting rather a lot of these tonight..any ideas?
> > >
> > >May 27 18:52:06 xor /boot/kernel/kernel: Data modified on freelist: word 2 of 
>object 0xc1a60100 size 64 previous type pagedep (0xd6adc0de != 0xdeadc0de)
> > 
> > If this isn't an ECC system
> 
>   I got one of these on my ECC system:
> 
> May 25 01:16:20  Master /boot/kernel/kernel: Data modified on
> freelist: word 2 of object 0xc1a58dc0 size 52 previous type vfscache
> (0xd6adc0de != 0xdeadc0de)

I'm using the following experimental patch to avoid system crashes and
the freelist corruption message.  The softupdate code seems to free
pagedeps structures with the NEWBLOCK flag set (which indicates that a
newdirblk structure is currently pointing to the pagedep structure).
When the newdirblk structure is freed later on, it clears the NEWBLOCK
flag, changing 0xdeadc0de to 0xd6adc0de.  If the memory for the
pagedep structure has been reused for something else, the system might
crash.  free_newdirblk will typically be on the ddb stack backtrace

- Tor Egge



Index: sys/ufs/ffs/ffs_softdep.c
===
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.97
diff -u -r1.97 ffs_softdep.c
--- sys/ufs/ffs/ffs_softdep.c   2001/05/19 19:24:26 1.97
+++ sys/ufs/ffs/ffs_softdep.c   2001/05/24 01:48:22
@@ -1932,6 +1932,11 @@
WORKLIST_INSERT(&inodedep->id_bufwait,
&dirrem->dm_list);
}
+   if ((pagedep->pd_state & NEWBLOCK) != 0) {
+   FREE_LOCK(&lk);
+   panic("deallocate_dependencies: "
+ "active pagedep");
+   }
WORKLIST_REMOVE(&pagedep->pd_list);
LIST_REMOVE(pagedep, pd_hash);
WORKITEM_FREE(pagedep, D_PAGEDEP);
@@ -3930,8 +3935,12 @@
 * is written back to disk.
 */
if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
-   LIST_REMOVE(pagedep, pd_hash);
-   WORKITEM_FREE(pagedep, D_PAGEDEP);
+   if ((pagedep->pd_state & NEWBLOCK) != 0) {
+   printf("handle_written_filepage: active pagedep\n");
+   } else {
+   LIST_REMOVE(pagedep, pd_hash);
+   WORKITEM_FREE(pagedep, D_PAGEDEP);
+   }
}
return (0);
 }



Re: next panic: blockable sleep lock

2001-05-27 Thread Tor . Egge

> freeing uidinfo: uid = 0, sbsize = 3197224
> freeing uidinfo: uid = 0, proccnt = 86
[...]
> trap(c8d20018,c01d0010,c8cb0010,4,c0b3351c) at trap+0x5d0
> calltrap() at calltrap+0x5
> --- trap 0xc, eip = 0xc01ba652, esp = 0xc8d27ed4, ebp = 0xc8d27ee0 ---
> _mtx_lock_sleep(c0b3351c,0,c035076c,364) at mtx_lock_sleep+0x342
> chgproccnt(c0b33500,,0,c1280900,c03b0d40,c8d26bbc,c1280900) at 
>chgproccnt+0x67

The ui_ref member in struct uidinfo is only 16 bits.  This means that
a fatal wraparound due to a missing call to uifree() can happen rather
quickly.

Index: sys/kern/kern_prot.c
===
RCS file: /home/ncvs/src/sys/kern/kern_prot.c,v
retrieving revision 1.91
diff -u -r1.91 kern_prot.c
--- sys/kern/kern_prot.c2001/05/25 16:59:06 1.91
+++ sys/kern/kern_prot.c2001/05/27 07:10:10
@@ -1303,6 +1303,8 @@
 */
if (cr->cr_uidinfo != NULL)
uifree(cr->cr_uidinfo);
+   if (cr->cr_ruidinfo != NULL)
+   uifree(cr->cr_ruidinfo);
/*
 * Free a prison, if any.
 */


- Tor Egge

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: trap in vm_fault

2001-03-20 Thread Tor . Egge

> Got this on sunday on my laptop (400 MHz PII running a week-old
> -CURRENT):

> #7  0xc0269fcd in trap_pfault (frame=0xc7be6ed8, usermode=0, eva=3332689920)
> at ../../i386/i386/trap.c:888
> #8  0xc0269404 in trap (frame={tf_fs = 24, tf_es = -1071054832, tf_ds = 16,
>   tf_edi = -962277376, tf_esi = -1062592768, tf_ebp = -943821008,
>   tf_isp = -943821052, tf_ebx = -1057541376, tf_edx = 368, tf_ecx = 512,
>   tf_eax = -1062618816, tf_trapno = 12, tf_err = 2, tf_eip = -1072504984,
>   tf_cs = 8, tf_eflags = 78406, tf_esp = -1057541376, tf_ss = -1062575520})
> at ../../i386/i386/trap.c:448
> #9  0xc012df68 in atapi_read (request=0xc0f73300, length=2352)
> at machine/cpufunc.h:227

Trying to mount a music cd might cause the atapi code to try to read
9408 bytes into a 8192 bytes long buffer.

sys/dev/ata/atapi-cd.c:
count = (bp->bio_bcount + (blocksize - 1)) / blocksize;


sys/ufs/ffs/ffs_vfsops.c:
if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
goto out;

sys/ufs/ffs/fs.h:
        #define SBSIZE8192

- Tor Egge

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: HEADS UP: installworld gotchas

2001-02-11 Thread Tor . Egge


> The new libc is incompatible with some old applications, but I'm not
> too sure why.  The lock was added at the end of FILE...

The size of FILE changed, thus the old application and the new library
no longer agree about the values for stdout and stderr:

#define stdin   (&__sF[0])
#define stdout  (&__sF[1])
#define stderr  (&__sF[2])

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: Debugging SMP instability (was Re: HEADS-UP: await/asleep removal imminent)

2001-01-18 Thread Tor . Egge

> cool.
> What are the instructions for using this?
> should something have sio1 open?


I use conserver


conserver   conserver hostnull-modem serial cables   test machine
label

testport AA   -   sio0   serial console

testnmi port BB   -   sio1   NMI


I start two conserver sessions, one using test (for the console
access) and one using port testnmi (for NMI).

When I need an NMI, I just press return or space in the session using
port BB.

This only works when the test machine runs an SMP kernel with DDB and
the virtual NMI pushbutton patch.

No programs on the test machine should open sio1, since that could
cause interrupts (which are now NMIs).

> can a paperclip be used to generat the interupt by connecting pins 2 and 3?

I haven't tried that.

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: Debugging SMP instability (was Re: HEADS-UP: await/asleep removal imminent)

2001-01-17 Thread Tor . Egge

> Again I'll offer to run any and all code or patches to -current you
> guys can come up with, but I simply dont have the time to sit down
> and analyze into details what you have been doing...

The enclosed patch implements a virtual NMI pushbutton by programming
the IOAPIC to deliver an NMI when sio1 generates an interrupt.

DDB should be defined in the kernel config file.

getty should not run on ttyd1 when this patch is applied.

A serial console on sio0 is recommended.

If you still cannot break into the kernel debugger when the machine
locks up then a rogue device is probably blocking the system
(or the debugger is trying to obtain a mutex held by somebody else)

- Tor Egge




Index: sys/i386/i386/mpapic.c
===
RCS file: /home/ncvs/src/sys/i386/i386/mpapic.c,v
retrieving revision 1.45
diff -u -r1.45 mpapic.c
--- sys/i386/i386/mpapic.c  2001/01/10 04:43:46 1.45
+++ sys/i386/i386/mpapic.c  2001/01/18 05:44:30
@@ -269,6 +269,41 @@
/* return GOOD status */
return 0;
 }
+
+
+void
+enable_sio_NMI(int irq) 
+{
+   u_char  select;  /* the select register is 8 bits */
+   u_int32_t flags; /* the window register is 32 bits */
+   u_int32_t target;/* the window register is 32 bits */
+   u_int32_t vector;/* the window register is 32 bits */
+   int apic;
+   int pin;
+   
+   if (irq < 0 || irq > 15) {
+   printf("Could not enable NMI for irq %d\n", irq);
+   return;
+   }
+   apic = int_to_apicintpin[irq].ioapic; 
+   pin = int_to_apicintpin[irq].int_pin;
+
+   target = CPU_TO_ID(0) << 24;
+   select = IOAPIC_REDTBL0 + (2 * pin);
+   vector = TPR_FAST_INTS + irq;
+   flags =  ((u_int32_t)
+ (IOART_INTMCLR |
+  IOART_TRGREDG |
+  IOART_INTAHI |
+  IOART_DESTPHY |
+  IOART_DELNMI));
+   
+   io_apic_write(apic, select, flags | vector);
+   io_apic_write(apic, select + 1, target);
+   printf("Enabled NMI for irq %d\n", irq);
+   printf("XXX IOAPIC #%d intpin %d ->irq %d vector 0x%x (Delivery mode NMI)\n",
+  apic, pin, irq, vector);
+}
 #undef DEFAULT_ISA_FLAGS
 #undef DEFAULT_FLAGS
 
Index: sys/i386/i386/trap.c
===
RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v
retrieving revision 1.164
diff -u -r1.164 trap.c
--- sys/i386/i386/trap.c2001/01/10 04:43:46 1.164
+++ sys/i386/i386/trap.c2001/01/18 05:44:30
@@ -248,7 +248,8 @@
 
atomic_add_int(&cnt.v_trap, 1);
 
-   if ((frame.tf_eflags & PSL_I) == 0) {
+   if ((frame.tf_eflags & PSL_I) == 0 &&
+   frame.tf_trapno != T_NMI) {
/*
 * Buggy application or kernel code has disabled
 * interrupts and then trapped.  Enabling interrupts
@@ -285,8 +286,38 @@
enable_intr();
}   
 
-   mtx_enter(&Giant, MTX_DEF);
+   if (frame.tf_trapno == T_NMI) {
+   /* If we can't get Giant then forward NMI to next CPU */
+   if (mtx_try_enter(&Giant, MTX_DEF) == 0) {
+   u_long  icr_lo;
+   u_long  icr_hi;
+   int target;
+
+   target = PCPU_GET(cpuid) + 1;
+   if (((1 << target) & PCPU_GET(other_cpus)) == 0)
+   target = 0;
+   
+   /* write the destination field for the target AP */
+   icr_hi = (lapic.icr_hi & ~APIC_ID_MASK) |
+   (cpu_num_to_apic_id[target] << 24);
+   lapic.icr_hi = icr_hi;
+   
+   /* write command */
+   icr_lo = (lapic.icr_lo & APIC_RESV2_MASK) |
+   APIC_DEST_DESTFLD | APIC_DELMODE_NMI | 0xff;
+   lapic.icr_lo = icr_lo;
+   
+   /* wait for pending status end */
+   while (lapic.icr_lo & APIC_DELSTAT_MASK)
+   /* spin */ ;
 
+   __asm __volatile("int $0xff");
+
+   return;
+   }
+   } else
+   mtx_enter(&Giant, MTX_DEF);
+
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 restart:
 #endif
@@ -388,6 +419,9 @@
 */
if (ddb_on_nmi) {
printf ("NMI ... going to debugger\n");
+   sioEATintr();
+  

Re: Strange dma_init error on current with awe64 and floppy

2000-11-07 Thread Tor . Egge


> I'm sure I must have overseen something trivial, but currently I can't
> figure out what it is.

The lower 16 MB memory has been used for

kernel text, data, bss
arrays allocated by vm_page_startup()
memory allocated via malloc() with M_ZERO

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



RE: sys/i386/i386/machdep.c:cpu_idle() changes causes this Was:

2000-10-22 Thread Tor . Egge

> 
> On 20-Oct-00 Valentin Chopov wrote:
> > I found that if I remove  #ifndef SMP /#endif  in:
> 
> Errr, this doesn't really make sense, and if anything is probably
> hiding the problem.  Also, this change will potentially increase
> interrupt latency even further on SMP machines.

Interrupts are disabled if Giant is busy in vm_page_zero_idle, thus
the idle proc calls mi_switch with interrupts disabled and the process
being scheduled starts running with interrupts disabled.

I suggest removing the asm statement from vm_page_zero_idle as a first
stage in rewriting vm_page_zero_idle.

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: newfs/fsck problem (bad superblocks)

2000-10-22 Thread Tor . Egge

> Reverting src/sbin/newfs/mkfs.c to revision 1.29 fixes
> the problem.
> 
> With just a quick review of the patch, I'm not sure I
> understand what forces the last dirty buffer to be
> written.
> 
> revert the patch? try to fix it? comments?

Try the enclosed patch.  It flushes the dirty buffer before
program exit and before reading blocks.

- Tor Egge



Index: sbin/newfs/mkfs.c
===
RCS file: /home/ncvs/src/sbin/newfs/mkfs.c,v
retrieving revision 1.30
diff -u -r1.30 mkfs.c
--- sbin/newfs/mkfs.c   2000/10/17 00:41:36 1.30
+++ sbin/newfs/mkfs.c   2000/10/22 08:17:05
@@ -153,6 +153,7 @@
 void rdfs __P((daddr_t, int, char *));
 void setblock __P((struct fs *, unsigned char *, int));
 void wtfs __P((daddr_t, int, char *));
+void wtfsflush __P((void));
 
 #ifndef STANDALONE
 void get_memleft __P((void));
@@ -719,6 +720,7 @@
for (cylno = 0; cylno < sblock.fs_ncg; cylno++)
wtfs(fsbtodb(&sblock, cgsblock(&sblock, cylno)),
sbsize, (char *)&sblock);
+   wtfsflush();
/*
 * Update information about this partion in pack
 * label, to that it may be updated on disk.
@@ -1309,6 +1311,7 @@
 {
int n;
 
+   wtfsflush();
if (mfs) {
memmove(bf, membase + bno * sectorsize, size);
return;
@@ -1330,6 +1333,27 @@
 static char wc[WCSIZE];/* bytes */
 
 /*
+ * Flush dirty write behind buffer.
+ */
+void
+wtfsflush()
+{
+   int n;
+   if (wc_end) {
+   if (lseek(fso, (off_t)wc_sect * sectorsize, SEEK_SET) < 0) {
+   printf("seek error: %ld\n", (long)wc_sect);
+   err(35, "wtfs - writecombine");
+   }
+   n = write(fso, wc, wc_end);
+   if (n != wc_end) {
+   printf("write error: %ld\n", (long)wc_sect);
+   err(36, "wtfs - writecombine");
+   }
+   wc_end = 0;
+   }
+}
+
+/*
  * write a block to the file system
  */
 void
@@ -1363,19 +1387,8 @@
if (wc_end < WCSIZE)
return;
done = 1;
-   }
-   if (wc_end) {
-   if (lseek(fso, (off_t)wc_sect * sectorsize, SEEK_SET) < 0) {
-   printf("seek error: %ld\n", (long)wc_sect);
-   err(35, "wtfs - writecombine");
-   }
-   n = write(fso, wc, wc_end);
-   if (n != wc_end) {
-   printf("write error: %ld\n", (long)wc_sect);
-   err(36, "wtfs - writecombine");
-   }
-   wc_end = 0;
}
+   wtfsflush();
if (done)
return;
if (lseek(fso, (off_t)bno * sectorsize, SEEK_SET) < 0) {



Re: Debugging -current SMPNG HANG on heavy disk-io

2000-09-19 Thread Tor . Egge

> (kgdb) ps
>   pidprocaddruid  pri ppid  pgrp   flag stat comm wchan
>37 c7874a00 c96650000  32 636  004086  3  tar  piperd c9663f20
>36 c7874bc0 c960a0000  32 636  004006  3  tar  FFS node 
>c02f4220

This looks like you've hit the limit for the FFS node memory type.

vmstat -m will indicate if this is correct.

If you see somethinig like

  Memory statistics by type  Type  Kern
  Type  InUse MemUse HighUse  Limit Requests Limit Limit Size(s)
[]
   FFS node262144 65536K  65536K 65536K  20244600 6  256
[]
Memory Totals:  In UseFreeRequests
93897K608K 9482590

(i.e. MemUse == Limit), then you've hit the limit.  The process
allocating a FFS node normally holds a vnode lock, resulting in 
a cascade of vnode locks and a frozen system.

Increasing the kmem_map size (by setting a loader variable
(kern.vm.kmem.size) or defining VM_KMEM_SIZE and VM_KMEM_SIZE_MAX in
the kernel config file) should help.

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: Dirty buffers on reboot..

2000-09-08 Thread Tor . Egge

> Ok, an update on the dirty buffers on reboot:
> 
> If you use the reboot command, you will get dirty buffers.  If you use
> 'shutdown -r now' instead, you won't get dirty buffers.  Thus, as a workaround
> for now, use the shutdown command to reboot your box until we can track this
> down.

I suggest using some method to allow interrupt threads to run during
the shutdown.  Perhaps the current process priority should be elevated
so it is guaranteed to be scheduled after the last interrupt thread
instead of some unrelated process.

Index: kern_shutdown.c
===
RCS file: /home/ncvs/src/sys/kern/kern_shutdown.c,v
retrieving revision 1.80
diff -u -r1.80 kern_shutdown.c
--- kern_shutdown.c 2000/09/07 01:32:51 1.80
+++ kern_shutdown.c 2000/09/09 01:42:20
@@ -220,6 +220,7 @@
if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
register struct buf *bp;
int iter, nbusy;
+   int subiter;
 
waittime = 0;
printf("\nsyncing disks... ");
@@ -247,7 +248,18 @@
break;
printf("%d ", nbusy);
sync(&proc0, NULL);
-   DELAY(5 * iter);
+   
+   if (curproc != NULL) {
+ for (subiter = 0; subiter < 50 * iter; subiter++) {
+   mtx_enter(&sched_lock, MTX_SPIN);
+   setrunqueue(curproc);
+   mi_switch(); /* Allow interrupt threads to run */
+   mtx_exit(&sched_lock, MTX_SPIN);
+   DELAY(1000);
+ }
+   } else
+ DELAY(5 * iter);
+     
}
printf("\n");
/*

- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: panic: pmap_enter: attempted pmap_enter on 4MB page

2000-09-02 Thread Tor . Egge


> pci50: physical bus=-1061225984
> 
> A little dubious, I think.  Below is the dmesg(8) output from
> kernel.old, my last good kernel built yesterday, booted with ``boot
> -v''.
> 

The enclosed patch might be relevant.  I got similar problems without
it.

Index: pcisupport.c
===
RCS file: /home/ncvs/src/sys/pci/pcisupport.c,v
retrieving revision 1.168
diff -u -r1.168 pcisupport.c
--- pcisupport.c2000/08/31 23:11:35 1.168
+++ pcisupport.c2000/09/03 02:16:21
@@ -770,7 +915,7 @@
ivar = malloc(sizeof ivar[0], M_DEVBUF /* XXX */, M_NOWAIT);
if (ivar == NULL)
panic("out of memory");
-   device_set_ivars(child, ivar);
+   device_set_ivars(dev, ivar);
ivar[0] = secondary;
return bus_generic_attach(dev);
} else


- Tor Egge


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: Bus error on savecore

2000-09-02 Thread Tor . Egge

> Has anyone else noticed savecore core dumping on a bus error? I think it
> started on my yesterday's make world. Redid the make world today with the
> latest sources and it's still doing it. Leaves a nice savecore.core in /.

Try the enclosed patch.

- Tor Egge



Index: sbin/savecore/savecore.c
===
RCS file: /home/ncvs/src/sbin/savecore/savecore.c,v
retrieving revision 1.33
diff -u -r1.33 savecore.c
--- sbin/savecore/savecore.c2000/05/09 22:20:14 1.33
+++ sbin/savecore/savecore.c2000/09/01 01:56:01
@@ -108,7 +108,7 @@
 intdumpsize;   /* amount of memory dumped */
 
 char   *kernel;
-char   *dirname;   /* directory to save dumps in */
+static char*dirname;   /* directory to save dumps in */
 char   *ddname;/* name of dump device */
 dev_t  dumpdev;/* dump device */
 intdumpfd; /* read/write descriptor on char dev */



mpboot.s patch

2000-05-23 Thread Tor . Egge

> >> With a current kernel I get this when booting:
> >> 
> >> Programming 24 pins in IOAPIC #0
> >> AP #1  (PHY# 12) failed!
> >> panic y/n [y] panic: bye-bye
> >> mp_lock = 0001; cpuid = 0; lapic.id = 
> >> Uptime: 0s

[...]

> I think this has something to do 
> with the new binutils as a kernel built on the 14th and restored via tape works fine 
>but if i
> check out the sys tree from the 14th and build a kernel it panics at the APIC probe.

Try the enclosed patch.

- Tor Egge




Index: sys/i386/i386/mpboot.s
===
RCS file: /home/ncvs/src/sys/i386/i386/mpboot.s,v
retrieving revision 1.13
diff -u -r1.13 mpboot.s
--- sys/i386/i386/mpboot.s  2000/01/29 13:51:17 1.13
+++ sys/i386/i386/mpboot.s  2000/05/24 01:28:53
@@ -165,20 +165,15 @@
 BOOTMP1:
 
 NON_GPROF_ENTRY(bootMP)
+   .code16 
cli
CHECKPOINT(0x34, 1)
/* First guarantee a 'clean slate' */
-   data32
xorl%eax, %eax
-   data32
movl%eax, %ebx
-   data32
movl%eax, %ecx
-   data32
movl%eax, %edx
-   data32
movl%eax, %esi
-   data32
movl%eax, %edi
 
/* set up data segments */
@@ -188,17 +183,18 @@
mov %ax, %fs
mov %ax, %gs
mov %ax, %ss
-   mov $(boot_stk-_bootMP), %sp
+   mov $(boot_stk-_bootMP), %esp
 
/* Now load the global descriptor table */
addr32
data32
-   lgdtMP_GDTptr-_bootMP
+   /* XXX: sigh: lgdt  MP_GDTptr-_bootMP GAS BUG! */
+   .byte   0x0f, 0x01, 0x15/* XXX hand assemble! */
+   .long   MP_GDTptr-_bootMP   /* XXX hand assemble! */
 
/* Enable protected mode */
data32
movl%cr0, %eax
-   data32
orl $CR0_PE, %eax
data32
movl%eax, %cr0 
@@ -207,13 +203,11 @@
 * make intrasegment jump to flush the processor pipeline and
 * reload CS register
 */
-   data32
pushl   $0x18
-   data32
pushl   $(protmode-_bootMP)
-   data32
-   lret
+   lretl
 
+   .code32 
 protmode:
CHECKPOINT(0x35, 2)
 



Re: DDB && SMP?

1999-09-24 Thread Tor . Egge

> 
> I just ran across this:
> 
> Debugger("isp_attach")
> Stopped at  Debugger+0x37:  movl$0,in_Debugger
> db> cont
> whoa, other_cpus: 0x0002, stopped_cpus: 0x
> panic: stop_cpus() failed
> mp_lock = 0002; cpuid = 0; lapic.id = 
> Automatic reboot in 15 seconds - press a key on the console to abort
> 
> 
> Whuffo?

The kernel probably entered the debugger after the AP had been started
but before it accepts any interrupts.

- Tor Egge



Index: sys/i386/i386/db_interface.c
===
RCS file: /home/ncvs/src/sys/i386/i386/db_interface.c,v
retrieving revision 1.46
diff -u -r1.46 db_interface.c
--- db_interface.c  1999/08/28 00:43:42 1.46
+++ db_interface.c  1999/09/24 23:43:34
@@ -167,7 +167,7 @@
 #endif /* VERBOSE_CPUSTOP_ON_DDBBREAK */
 
/* Restart all the CPUs we previously stopped */
-   if (stopped_cpus != other_cpus) {
+   if (stopped_cpus != other_cpus && smp_started != 0) {
db_printf("whoa, other_cpus: 0x%08x, stopped_cpus: 0x%08x\n",
  other_cpus, stopped_cpus);
panic("stop_cpus() failed");
Index: sys/i386/include/smp.h
===
RCS file: /home/ncvs/src/sys/i386/include/smp.h,v
retrieving revision 1.47
diff -u -r1.47 smp.h
--- smp.h   1999/08/28 00:44:25 1.47
+++ smp.h   1999/09/24 23:39:47
@@ -177,6 +177,7 @@
 /* global data in init_smp.c */
 extern int invltlb_ok;
 extern int smp_active;
+extern int smp_started;
 extern volatile intsmp_idle_loops;
 
 #endif /* !LOCORE */



Re: request for review, patch to specfs to fix EOF condition alignment with buffer

1999-09-20 Thread Tor . Egge

> This problem was first found by Tor.  Tor's example creates
> an oddly-sized VN partition and then  dd's from it.  Without the
> patch the dd believes that it can read 2880 sectors.  With the
> patch it correctly reads the last (truncated) block.

Actually, the problem was discussed in -stable under the topic
"Interesting way to crash a 3.2-stable box" around 1999-08-28.

The discussion soon changed topic to 
"Interesting ways to print 3000 spaces...".

- Tor Egge



To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message



Re: ENABLE_SERIAL_BREAK_KEY...or something?

1999-06-10 Thread Tor . Egge

> Would be most excellent if this could be done. A couple of boxen I
> have here have serial consoles attached to other machines which
> do a very good simulation of a break when the controlling process
> leaves them. Dropping to DDB every time you reboot the other machine
> is, uh, less than desirable behaviour. :-)

I had the same problem.  Changing the sio code to require three breaks
inside a 10 seconds interval before dropping into the debugger reduced
the problem for me.

- Tor Egge

Index: sys/i386/conf/options.i386
===
RCS file: /home/ncvs/src/sys/i386/conf/options.i386,v
retrieving revision 1.116
diff -u -r1.116 options.i386
--- options.i3861999/06/06 22:45:04 1.116
+++ options.i3861999/06/08 00:27:17
@@ -24,6 +24,7 @@
 AUTO_EOI_1 opt_auto_eoi.h
 AUTO_EOI_2 opt_auto_eoi.h
 BREAK_TO_DEBUGGER  opt_comconsole.h
+TRIPLE_BREAK_TO_DEBUGGER opt_comconsole.h
 CONSPEED   opt_comconsole.h
 I586_PMC_GUPROFopt_i586_guprof.h
 WLCACHEopt_wavelan.h
Index: sys/isa/sio.c
===
RCS file: /home/ncvs/src/sys/isa/sio.c,v
retrieving revision 1.246
diff -u -r1.246 sio.c
--- sio.c   1999/05/31 06:57:31 1.246
+++ sio.c   1999/06/08 00:27:38
@@ -427,6 +427,16 @@
{ -1,   -1 }
 };
 
+#define DDB_BREAK_MASK (IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC)
+
+#ifdef TRIPLE_BREAK_TO_DEBUGGER
+#ifndef TRIPLE_BREAK_TIMEOUT
+#define TRIPLE_BREAK_TIMEOUT 10
+#endif
+int triple_break_count;/* number of breaks detected */
+int triple_break_time; /* time_second sampled at first break */
+#endif
+
 #ifdef COM_ESP
 /* XXX configure this properly. */
 static Port_t  likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, };
@@ -998,6 +1008,9 @@
com->lt_out.c_ispeed = com->lt_out.c_ospeed =
com->lt_in.c_ispeed = com->lt_in.c_ospeed =
com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate;
+#if defined(DDB) && defined(BREAK_TO_DEBUGGER) && defined(DDB_BREAK_MASK)
+   outb(iobase + com_ier, DDB_BREAK_MASK);
+#endif
} else
com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED;
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
@@ -1404,7 +1417,12 @@
com->pps.ppsparam.mode = 0;
outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
{
-   outb(iobase + com_ier, 0);
+#if defined(DDB) && defined(BREAK_TO_DEBUGGER) && defined(DDB_BREAK_MASK)
+   if (com->unit == comconsole)
+   outb(iobase + com_ier, DDB_BREAK_MASK);
+   else
+#endif
+   outb(iobase + com_ier, 0);
tp = com->tp;
if (tp->t_cflag & HUPCL
/*
@@ -1704,7 +1722,23 @@
if (line_status & LSR_BI) {
 #if defined(DDB) && defined(BREAK_TO_DEBUGGER)
if (com->unit == comconsole) {
+#ifdef TRIPLE_BREAK_TO_DEBUGGER
+   if (time_second > 
+   triple_break_time + 
+   TRIPLE_BREAK_TIMEOUT)
+   triple_break_count = 0;
+   triple_break_count++;
+   if (triple_break_count == 1)
+   triple_break_time = 
+   time_second;
+   else if (triple_break_count 
+== 3) {
+   triple_break_count = 0;
+   breakpoint();
+   }
+#else
breakpoint();
+#endif
goto cont;
}
 #endif


Re: 4-way SMP broken ?

1999-06-09 Thread Tor . Egge

> interesting.  then why the delay in bringing up the AP?  Note in the
> dmesg output below, that the AP only comes up during th SCSI delay.  I
> have also added other comments to the following output.

The APs need the giant kernel lock when initializing the 
local APIC and printing the "launched" message.

I added code for bringing up the APs earlier, but had to disable it,
since it caused some machines to hang.  The APs were probably launched
too early, causing the BSP to attempt to send IPIs before the local
APIC was initialized).

A revised patch for bringing up the APs early is enclosed.

- Tor Egge

Index: mp_machdep.c
===
RCS file: /home/ncvs/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.102
diff -u -r1.102 mp_machdep.c
--- mp_machdep.c1999/06/01 18:19:42 1.102
+++ mp_machdep.c1999/06/08 00:27:19
@@ -494,6 +494,10 @@
 
 
 #if defined(APIC_IO)
+
+/* Wait for all APs to be fully initialized */
+extern int wait_ap(unsigned int);
+
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
@@ -526,6 +530,9 @@
 
if (bootverbose)
apic_dump("bsp_apic_configure()");
+   wait_ap(100);
+   if (smp_started == 0)
+   printf("WARNING: Failed to start all APs\n");
 }
 #endif  /* APIC_IO */
 
@@ -1743,9 +1750,6 @@
 #endif /* USE_CLOCKLOCK */
 }
 
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
 
 /*
  * start each AP in our list


Re: successfull SMP / current on duel P-III box.Yahhhh. I've successfully brought -current up in SMP on a duel P-III box.

1999-04-17 Thread Tor . Egge
> I have one problem, though.  During the kernel boot:
> 
>   isa_dmainit(2, 1024) failed
> 
> And, of course, any access to something that needs isa 
> dma (e.g. floppy) panics.  It's a large-memory machine (1G).
> I was under the impression that this was supposed to be fixed
> in the vm/vm_page.c commit:

[...]

> Anyone have any ideas?

Yes.

Using a recent -current GENERIC kernel, I reproduced the problem with
a machine having 512 MB memory.

All physical memory below 16 MB is used.

vm_page_list_find uses TAILQ_LAST when prefer_zero is set, thus 
pv_table occupies the physical memory below 640 KB.

The remaining physical memory below 16 MB is allocated to
vm_page_array and vm_page_buckets in vm_page_startup, or was reserved
earlier (e.g. kernel text, data, bss).

IMO, vm_page_array and vm_page_buckets should not use physical memory
below 16 MB on large-memory machines. This can be achieved by
modyfing the contents of phys_avail, causing the largest region to 
be above 16 MB.

GENERIC kernel:
(kgdb) print phys_avail
$66 = {4096, 651264, 3624960, 536862720, 0, 0, 0, 0, 0, 0}

The kernel I normally use:
(kgdb) print phys_avail
$1 = {4096, 651264, 3301376, 16777216, 16777216, 536608768, 0, 0, 0, 0}

This works fine for machines with 512 MB and 1 GB memory.

For machines with more than 2 GB memory, the size of pv_table might become
a problem.

Alternating between TAILQ_INSERT_HEAD and TAILQ_INSERT_TAIL in
vm_page_startup might be a workaround for this second problem (causing
the memory below 16 MB not already allocated by vm_page_startup to be
in the middle of the page queues).

- Tor Egge


To Unsubscribe: send mail to majord...@freebsd.org
with "unsubscribe freebsd-current" in the body of the message



Re: SMP broken in -CURRENT?

1999-04-12 Thread Tor . Egge
> I haven't been able to get a working SMP kernel out of -CURRENT recently.
> I don't know exactly when it broke, because I usually rebuild on a weekly
> basis.  The kernel hangs after:
> APIC_IO: Testing 8254 interrupt delivery
> and doesn't ever come back (panic or otherwise).
> 
> The one thing that I noticed is that on the older kernels, CPU#1 is
> launched after the APIC_IO Testing and Routing.  On the newer kernels,
> CPU#1 is launched far earlier.
> 
> Anybody have any ideas?

You might want to try this patch, which disables the early start of CPU#1.

Index: mp_machdep.c
===
RCS file: /home/ncvs/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.96
diff -u -r1.96 mp_machdep.c
--- mp_machdep.c1999/04/11 00:43:43 1.96
+++ mp_machdep.c1999/04/13 02:08:54
@@ -1930,9 +1930,11 @@
for (i = 0; i < mp_ncpus; i++) {
bcopy( (int *) PTD + KPTDI, (int *) IdlePTDS[i] + KPTDI, NKPDE 
* sizeof (int));
}
+#if 0
wait_ap(100);
if (smp_started == 0)
printf("WARNING: Failed to start all APs\n");
+#endif
 
/* number of APs actually started */
return mp_ncpus - 1;




To Unsubscribe: send mail to majord...@freebsd.org
with "unsubscribe freebsd-current" in the body of the message