from:"Jeroen Van den Keybus"

[Xenomai-core] Long execution time inside __ipipe_unstall_iret_root()

2007-11-12 Thread Jeroen Van den Keybus

I have a setup which apparently suffers from long interrupt-disabled
sections during heavy load (make -j16 of a kernel). I have instrumented the
__ipipe_unstall_iret_root() function with the tracer as follows:


asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs)
{
ipipe_trace_special(0x20, 0);
ipipe_declare_cpuid;

/* Emulate IRET's handling of the interrupt flag. */

ipipe_trace_special(0x21, 0);
local_irq_disable_hw();

ipipe_trace_special(0x22, 0);
ipipe_load_cpuid();

/* Restore the software state as it used to be on kernel
   entry. CAUTION: NMIs must *not* return through this
   emulation. */

ipipe_trace_special(0x23, 0);
if (!(regs.eflags  X86_EFLAGS_IF)) {
ipipe_trace_special(0x24, 0);
if (!__test_and_set_bit(IPIPE_STALL_FLAG,
ipipe_root_domain-cpudata[cpuid].status))
trace_hardirqs_off();
regs.eflags |= X86_EFLAGS_IF;
ipipe_trace_special(0x25, 0);
} else {
ipipe_trace_special(0x26, 0);
if (test_bit(IPIPE_STALL_FLAG,
 ipipe_root_domain-cpudata[cpuid].status)) {
trace_hardirqs_on();
__clear_bit(IPIPE_STALL_FLAG,
ipipe_root_domain-cpudata[cpuid].status);
}
ipipe_trace_special(0x27, 0);

/* Only sync virtual IRQs here, so that we don't recurse
   indefinitely in case of an external interrupt flood. */

if ((ipipe_root_domain-cpudata[cpuid].
 irq_pending_hi  IPIPE_IRQMASK_VIRT) != 0) {
ipipe_trace_special(0x28, 0);
__ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
}
ipipe_trace_special(0x29, 0);
}
#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
ipipe_trace_end(0x800D);
#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
}

While under heavy load, the following trace log is produced:

...



   + func-1160.124  xnarch_get_cpu_time+0x8
(rtdm_toseq_init+0x21)
+ func-1160.169  xnarch_tsc_to_ns+0x12
(xnarch_get_cpu_time+0xf)
+ func-1160.284  rtdm_event_timedwait+0xe
(rt_e1000_read+0xf5 [rt_e1000])
 |  # func-1150.189  xnsynch_sleep_on+0xe
(rtdm_event_timedwait+0x141)
 |  # func-1150.184  xnpod_suspend_thread+0xe
(xnsynch_sleep_on+0x3bf)
 |  # func-1150.194  xntimer_migrate+0xe
(xnpod_suspend_thread+0xa2)
 |  # func-1150.144  xntimer_start_aperiodic+0xe
(xnpod_suspend_thread+0xc5)
 |  # func-1150.334  xnarch_ns_to_tsc+0x14
(xntimer_start_aperiodic+0x63)
 |  # func-1140.199  xnpod_schedule+0xe
(xnpod_suspend_thread+0x237)
 |  # [18006] -?-   99  -1140.814  xnpod_schedule+0x176
(xnpod_suspend_thread+0x237)
 |  # func-1130.874  __switch_to+0xe
(xnpod_schedule+0x77c)
 |  # [18019] -?-   -1  -1120.799  xnpod_schedule+0x887
(xnintr_irq_handler+0x18e)
 |   +func-1120.319  __ipipe_walk_pipeline+0xb
(__ipipe_handle_irq+0x87)
 |   #func-1110.204  __ipipe_stall_root+0x8
(resume_userspace+0x5)
 #func-1110.134  __ipipe_unstall_iret_root+0xa
(restore_nocheck_notrace+0x0)
 #(0x20)  0x  -1110.119  __ipipe_unstall_iret_root+0x19
(restore_nocheck_notrace+0x0)
 #(0x21)  0x  -1110.124  __ipipe_unstall_iret_root+0x25
(restore_nocheck_notrace+0x0)
 |   #(0x22)  0x  -1110.119  __ipipe_unstall_iret_root+0x32
(restore_nocheck_notrace+0x0)
 |   #(0x23)  0x  -1110.119  __ipipe_unstall_iret_root+0x45
(restore_nocheck_notrace+0x0)
 |   #(0x26)  0x  -1100.134  __ipipe_unstall_iret_root+0x88
(restore_nocheck_notrace+0x0)
 |   +(0x27)  0x  -1100.109  __ipipe_unstall_iret_root+0x9e
(restore_nocheck_notrace+0x0)
 |   +(0x29)  0x  -110   57.850  __ipipe_unstall_iret_root+0xb6
(restore_nocheck_notrace+0x0)
 |   +func -520.289  __ipipe_handle_irq+0xe
(ipipe_ipi3+0x26)
 |   +func -520.169  __ipipe_ack_apic+0x8
(__ipipe_handle_irq+0xbf)
 |   +func -520.154  __ipipe_dispatch_wired+0x14
(__ipipe_handle_irq+0x5f)
 |  # func -520.309  xnintr_clock_handler+0xe
(__ipipe_dispatch_wired+0xbe)
 |  # func -510.494  xntimer_tick_aperiodic+0xe
(xnintr_clock_handler+0x72)
 |  # func -510.459  xntimer_next_local_shot+0x16
(xntimer_tick_aperiodic+0x170)
 |  # func -500.154  rthal_irq_host_pend+0x8
(xnintr_clock_handler+0x131)
 |  # func -500.684  __ipipe_schedule_irq+0xb
(rthal_irq_host_pend+0x29)
 |   +func -500.294  __ipipe_walk_pipeline+0xb
(__ipipe_handle_irq+0x87)
 |   +func -490.309  __ipipe_sync_stage+0xe
(__ipipe_walk_pipeline+0xcb)
 #func -490.299  smp_apic_timer_interrupt+0x11

Re: [Xenomai-core] Long execution time inside __ipipe_unstall_iret_root()

2007-11-12 Thread Jeroen Van den Keybus

 So, unless the IRQ below
 should have been raised much earlier, there is no issue here.


The point is: I actually do have interrupts (not the IPI that is visible
here) that are being withheld (on average 80 us late) during this period. On
this particular setup, I have seen numbers exceeding 300 us. Strangely
enough, the latency test does not show these hiccups. I'm suspecting cache
issues, but I would think these numbers are a bit large for that.

What you say is that between tracepoint 0x27 and 0x29, we may have entered
userland. But where does this happen, given that point 0x28 is not executed
?

I also do understand that, if a cli or sti would still lurk somewhere in
this configuration, I'm going to have a real bad time... Although I doubt
it, since the unexpected latencies always happen at this particular point.

Jeroen.


If there is a real delay, evil user land may have switched off IRQs
 (given root privileges), or there is some hardware related IRQ delivery
 latency. But this cannot be determined via the tracer as it only sees
 kernel land.


___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Long execution time inside __ipipe_unstall_iret_root()

2007-11-12 Thread Jeroen Van den Keybus


 Ouch, this shouldn't be allowed in user space! WBINVD is a privileged
 instruction. Do we leak privileges to user land??? Please check if your
 execution mode (privilege ring) is correct there.


No, I rather meant a kernel-mode program that was controlled from the user
space. Sorry for upsetting you.

But indeed, wbinvd is devastating if you can execute it, causing
 typically around 300 us latencies, at worst even milliseconds
 (cache-size and state dependent)!


If I recall correctly, some of the Linux AGP GART drivers use(d?) it.

This raises another interesting question: to what extent is the x86 actually
a viable and dependable realtime platform, with its SMI's and highly
uncontrollable caching architecture ? How would VT-based solutions compare ?
(BTW Intel should really have implemented a feature to use parts of the
cache as SRAM.)


Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Registering MSI interrupt with Xenomai fails

2007-11-05 Thread Jeroen Van den Keybus


 Could there be any reason why e.g. a pci_config_read_dword() call would
 fail in Xenomai context (because when still in the module probe code, I
 can perform the call correctly - in both cases I checked the
 dev-bus-ops-read/write pointers and they are identical) ?


Today I moved the Xenomai ISR registration (rtdm_request_irq)) from the RTDM
open method (rtdm_dev_open) to the module probe code (dev_probe). Then the
registration works, but not always. Upon frequent insmod- and rmmodding, I
still can make the machine freeze.

I'm now more or less lost and open to any ideas...


Jeroen.


BTW: In the process of removing all instrumentation, I inadvertently
upgraded to SVN head (3149), after which no Xenomai process could be run
anymore (Binding failed. Bad address.). I solved that temporarily by
reverting to my original 3146.




 Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Registering MSI interrupt with Xenomai fails

2007-11-03 Thread Jeroen Van den Keybus



 arch_setup_msi_irq() creates an IRQ on-the-fly from the current
 descriptor which is being converted to an MSI interrupt using
 pci_msi_enable(). From that point, the I-pipe might have an obsolete
 view of the interrupt map. I suspect an I-pipe issue here.



I think the I-pipe is alright. It only cares for the actual interrupt
numbers and irq_desc[] should be current with these numbers upon the
ipipe_virtualize_irq call, which occurs only after enabling MSI, right ?

Currently it looks like every PCI config space access instruction in
read_msi_msg() (used to perform set_msi_irq_affinity) freezes the machine. I
have absolutely no clue yet why this happens.

Jeroen.


--
 Philippe.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Registering MSI interrupt with Xenomai fails

2007-11-03 Thread Jeroen Van den Keybus


 The I-pipe virtualizes all IOAPIC and ISA interrupts upon startup. Then,
 any code calling pci_msi_enable() would end up allocating a new MSI
 interrupt vector.


I see. So in fact, at least for the Linux domain, which indeed registers all
interrupts upon initialization, every newly created MSI vector should be
revirtualized.



  Currently it looks like every PCI config space access instruction in
  read_msi_msg() (used to perform set_msi_irq_affinity) freezes the
  machine. I have absolutely no clue yet why this happens.
 

 Wild trivial guess, is the irq parameter the expected one, since the
 rest depends on it?


You mean the one passed into xnarch_set_irq_affinity ? Yes, it is
consistently 219 and the mask is 0x3. In set_msi_irq_affinity(), the
associated vector is calculated as 225. I'm not sure the latter one is
correct, but at least it shouldn't freeze the machine upon a PCI config
read.

Could there be any reason why e.g. a pci_config_read_dword() call would in
Xenomai context (because when still in the module probe code, I can perform
the call correctly - in both cases I checked the dev-bus-ops-read/write
pointers and they are identical) ?


Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Registering MSI interrupt with Xenomai fails

2007-11-02 Thread Jeroen Van den Keybus



 Now looking into xnintr_attach().


In xnintr_attach(), the crash occurs in xnarch_set_irq_affinity(). If the
call is removed, the driver works as expected (gaining another 3 usec of
latency). Now investigating ipipe_set_irq_affinity.

Jeroen.




 Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

[Xenomai-core] Registering MSI interrupt with Xenomai fails

2007-11-02 Thread Jeroen Van den Keybus

We have a driver that operates on a PCIe card. The card has IRQ17. If we use
it like that (IO-APIC-fasteoi), interrupt registration using
rtdm_irq_request works correctly. (We also use rtdm_irq_enable afterwards,
but it seems that the request already enables the interrupt.)

However, if we redefine our interrupt as MSI using pci_enable_msi(),
rtdm_irq_request freezes the machine. (After pci_enable_msi, the new
interrupt vector is 218 and /proc/interrupts correctly reports
PCI-MSI-edge.)

We have another MSI-enabled card in the system (network card controlled by
Linux) and this one works correctly. So I suspect that the Ipipe is clear
and the bug must reside in Xenomai.

I've been adding printk instrumentation throughout the Ipipe, Xenomai and
RTDM, but the problem is that possibly the contents of the kernel log do not
make it to the terminal upon the freeze (no oops, no panic). Is there any
way of efficiently debugging this ?

Linux 2.6.23
Adeos 1.10-11
Xenomai SVN Rev. 3146

Thanks,


Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

[Xenomai-core] I-pipe fasteoi interrupt handling issue

2007-10-24 Thread Jeroen Van den Keybus

On my Linux 2.6.23 with latest I-pipe patch (1.10-10), interrupts are
dispatched twice if they are of the fasteoi type.

I have the impression that the I-pipe does the eoi() acknowledgement (in
kernel/irq/chip.c: __ipipe_ack_fasteoi_irq) without first masking off the
IRQ. As the interrupt line hasn't been cleared at this time, the interrupt
is immediately reissued. Afterwards (in __ipipe_end_fasteoi_irq), unmasking
is performed correctly nevertheless. If I add 'desc-chip-mask(irq);'
before the 'desc-chip-eoi(irq);', Linux won't boot anymore; apparently the
timer IRQs no longer make it to the APIC.

I'm now going to try a level-irq, but if in the meantime anyone can explain
me if the above reasoning is totally off track or why the timer locks up
after adding the mask...

Thanks,

Jeroen.



A typical I-pipe trace shows:

 +func -290.079  try_to_wake_up+0x14
(wake_up_process+0x14)
 +func -290.084  task_rq_lock+0x14
(try_to_wake_up+0x24)
 +func -290.464  ipipe_check_context+0x14
(task_rq_lock+0x28)
 #func -280.094  _spin_lock+0x9 (task_rq_lock+0x63)
 #func -280.509  ipipe_check_context+0x14
(_spin_lock+0x15)
 #func -280.479  ipipe_check_context+0x14
(_spin_lock+0x3c)
 Here's where the IRQ happens 
 |   #func -270.169  __ipipe_handle_irq+0xe
(common_interrupt+0x21)
 |   #func -270.119  __ipipe_ack_irq+0x8
(__ipipe_handle_irq+0xc4)
 Here's where the APIC already receives its EOI 
 |   #func -270.104  __ipipe_ack_fasteoi_irq+0x8
(__ipipe_ack_irq+0x19)
 |   #func -270.209  ack_ioapic_quirk_irq+0xa
(__ipipe_ack_fasteoi_irq+0xe)
 |   #func -260.114  __mask_IO_APIC_irq+0x8
(ack_ioapic_quirk_irq+0xaa)
 |   #func -260.119  __modify_IO_APIC_irq+0xe
(__mask_IO_APIC_irq+0x14)
 |   #func -261.859  io_apic_base+0x8
(__modify_IO_APIC_irq+0x4d)
 |   #func -240.174  io_apic_base+0x8
(__modify_IO_APIC_irq+0x59)
 |   #func -240.164  __ipipe_dispatch_wired+0x14
(__ipipe_handle_irq+0x73)
 |  #*func -240.289  xnintr_irq_handler+0xe
(__ipipe_dispatch_wired+0xea)
 Here's where our driver gets a chance to process the interrupt and clear
the IRQ line 
 |  #*func -242.109  rt_driver_intr+0xc [rt_driver]
(xnintr_irq_handler+0x15e)
 |  #*func -220.129  xnarch_get_cpu_time+0x8
(rt_driver_intr+0x4b [rt_driver])
 |  #*func -220.139  xnarch_tsc_to_ns+0x12
(xnarch_get_cpu_time+0xf)
 Our driver signals a pending task and leaves. 
 |  #*func -210.199  rtdm_event_signal+0xe
(rt_driver_intr+0x2e [rt_driver])
 |  #*func -211.299  xnsynch_flush+0xe
(rtdm_event_signal+0x12f)
 |  #*func -200.159  xnpod_resume_thread+0xe
(xnsynch_flush+0x3a4)
 |  #*[ 2489] -?-   99   -200.294  xnpod_resume_thread+0x140
(xnsynch_flush+0x3a4)
 |  #*func -191.549  xntimer_stop_aperiodic+0xe
(xnpod_resume_thread+0xa0d)
 |  #*func -180.644  xntimer_next_local_shot+0x16
(xntimer_stop_aperiodic+0x4f4)
 |  #*func -170.419  xnpod_schedule+0x11
(rtdm_event_signal+0x2b4)
 |  #*func -170.129  rthal_irq_end+0x8
(xnintr_irq_handler+0x262)
 |  #*func -170.134  __ipipe_end_fasteoi_irq+0x8
(rthal_irq_end+0x24)
 |  #*func -170.109  unmask_IO_APIC_irq+0x12
(__ipipe_end_fasteoi_irq+0xe)
 |  #*func -160.134  __ipipe_spin_lock_irqsave+0x9
(unmask_IO_APIC_irq+0x1e)
 |  #*func -160.099  __unmask_IO_APIC_irq+0x8
(unmask_IO_APIC_irq+0x27)
 |  #*func -160.089  __modify_IO_APIC_irq+0xe
(__unmask_IO_APIC_irq+0x14)
 |  #*func -161.609  io_apic_base+0x8
(__modify_IO_APIC_irq+0x4d)
 |  #*func -150.159  io_apic_base+0x8
(__modify_IO_APIC_irq+0x59)
 |  #*func -140.124  __ipipe_unlock_irq+0xe
(unmask_IO_APIC_irq+0x3d)
 |  #*func -140.184  __ipipe_spin_unlock_irqrestore+0x9
(unmask_IO_APIC_irq+0x49)
 |  #*func -140.209  xnpod_schedule+0x11
(xnintr_irq_handler+0x28f)
 |  #*[  220] gatekee -1   -141.139  xnpod_schedule+0xec
(xnintr_irq_handler+0x28f)
 |  #*func -130.599  __switch_to+0xe
(xnpod_schedule+0x8b4)
 |  #*[ 2489] -?-   99   -121.019  xnpod_schedule+0x952
(xnpod_suspend_thread+0x72c)
 |  #*func -110.589  __ipipe_restore_pipeline_head+0x9
(rtdm_event_timedwait+0x181)
 |  +*func -100.169  __ipipe_handle_irq+0xe
(common_interrupt+0x21)
 |  +*func -100.109  __ipipe_ack_irq+0x8
(__ipipe_handle_irq+0xc4)
 |  +*func -100.109  __ipipe_ack_fasteoi_irq+0x8

[Xenomai-core] PRNG not seeded on AMD K7

2006-03-12 Thread Jeroen Van den Keybus

When I try to open an ssh connection to or from a PC running Xenomai, I get a'PRNG not seeded' error. However, it only occurs when an RT application is running (such as 'latency'). The computer on which this occurs is an AMD K7-650. I've tested on a Pentium III 500 as well (nearly same .config) and this error doesn't show up then.


Any idea of where I should start debugging ? I assume that PRNG stands for'Programmable Random Number Generator' or something like that.

Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-17 Thread Jeroen Van den Keybus

I tried this patch and it doesn't solve the issue I'm facing. With andwithout this patch, my symptoms are the same.

I tested (and intended) the patch for MSI (w/o maskbits), not MSI-X.
What e1000 chip are you using exactly? Easiest way to tell is by using
'/sbin/lspci'. I may be able to help you out with MSI-X as well, but in
that case, I have no hardware platform to test on.

You can check whether or not MSI is actually being used by doing
'/sbin/lspci -v' and look for the Capability: Message Signalled
Interrupt. When the driver is running in MSI mode, it should read
'Enable+' instead of 'Enable-'.

Finally, verify how interrupts are dispatched. Have a look at /proc/interrupts for this (cat /proc/interrupts').

I'm running a Dell 2850, dual CPU machine.
As it's a Dell, I assume there's two Intel Penium CPU's inside. Are you running with SMP enabled ?

When I build a kernel withoutAdeos then things are fine.When I build with Adeos and MSI enabled the
following occurs:1) If BIOS has USB disabled then the system will hang without even anum-lock respose (i.e. tapping the num-lock key doesn't toggle the light).The hang occurs just about the time the E1000 driver would load and enable
an MSI interrupt.2) If BIOS has USB enabled then the system will run much longer but may hangduring heavy interrupt load on the E1000 driver.
Are you using the e1000 driver in NAPI mode ? It is recommended to do
this, especially on the preemptible kernel, as it may significantly
reduce the interrupt volume. In that case, I think it is doubtful if
using MSI would give you any benefit at all over normal, shared IRQs.

My assumption based on past experience is that no num-lock response means aninfinite interrupt loop.

The local (internal) CPU APIC hasn't been informed that the interrupt
has been dealt with and it will therefore allow no other interrupts
anymore to arrive in the CPU (including your keyboard's). In fact, your
CPU is idle.

[The original 8259 was designed to detect the IRET instruction bit
pattern on the databus and use that as an acknowledge signal. Upon
arrival of the second 8259 in the PC/AT, this could no longer be done.
I don't know if the APIC could do it today (it seems possible,
theoretically). ]
When I build a kernel with Adeos but disable MSI then the system works fine
for the most part.There is one scenario where the system will still hangdoing disk and network accesses under a moderate load of I/O.

Hm. That may indicate another issue.

Both of these tests are just to get a stable kernel before I really startusing Adeos.So Adeos is in its default configuration and I haven't loaded
Xenomai modules when these hangs occur.
I'm currently running the 2.6.14.4 kernel with the 2.6.14-1.0-12 patch ofadeos and then I included your msi.c patch from the previous e-mail.If youhave any further hints or suggestions I'll try them.Meanwhile I'm trying
different versions of various drivers (e1000 and scsi) as well as updatingthe patch level of the kernel itself.
Try upgrading the kernel. The kernel usually comes with updated drivers
as well. Currently I'm running 2.6.16-rc2, which I had to patch
manually for Adeos (about 3 'hunks' from the 2.6.15-i386-1.2-00 patch
didn't apply properly). By using 2.6.16-rc2, I got much better Intel
(especially i865 graphics) chipset support than 2.6.15. Note, however,
that I did the bug fixing in this thread on a plain 2.6.15, though (and
the msi.c code is nearly identical).

I would recommend upgrading to 2.6.15 with the latest Adeos patch and try to get a stable system before enabling MSI.
Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-17 Thread Jeroen Van den Keybus

Could you post the patch you are successfully using to boot your box? TIA,


--- linux-2.6.15/drivers/pci/msi.c 2006-01-03 04:21:10.0 +0100
+++ linux-2.6.15-ipipe/drivers/pci/msi.c 2006-02-17 16:48:21.0 +0100
@@ -185,10 +185,20 @@
 spin_unlock_irqrestore(msi_lock, flags);
}

+#if defined(CONFIG_IPIPE)
+/* Attention: only MSI without maskbits is currently fixed for I-PIPE */
+static void ack_msi_irq_wo_maskbit(unsigned int vector)
+{
+ __ack_APIC_irq();
+}
+#endif /* CONFIG_IPIPE */
+
static void end_msi_irq_wo_maskbit(unsigned int vector)
{
 move_native_irq(vector);
+#if !defined(CONFIG_IPIPE)
 ack_APIC_irq();
+#endif /* !CONFIG_IPIPE */
}

static void end_msi_irq_w_maskbit(unsigned int vector)
@@ -244,7 +254,11 @@
 .shutdown = shutdown_msi_irq,
 .enable = do_nothing,
 .disable = do_nothing,
+#if defined(CONFIG_IPIPE)
+
.ack
= ack_msi_irq_wo_maskbit,
+#else /* CONFIG_IPIPE */
 .ack = do_nothing,
+#endif /* !CONFIG_IPIPE */

.end
= end_msi_irq_wo_maskbit,
 .set_affinity = set_msi_irq_affinity
};


Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-17 Thread Jeroen Van den Keybus

Ok; unless my brain is completely toast, the last patch I recently posted does thesame, but extends the support to the MSI and MSI-X with masking bit cases.

Correct. 

 Could you test in on your box with a vanilla 2.6.15 when time allows? If it works, then

I will roll out a new Adeos/x86 patch including this fix. TIA,
I'll do that. Give me half an hour.


Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-15 Thread Jeroen Van den Keybus

In a search for the problem, I encountered some code which may be at the root of the problem. In file

arch/i386/kernel/io_apic.cI see that a function mask_and_ack_level_ioapic_vector()
is being defined, whereas the original 2.6.15 code did not ever issue
any IO_APIC calls (both mask_and_ack_level_ioapic and end_edge_ioapic
are void in include/linux/).

Is it possible that this code was transferred with patches for earlier kernels (at least from 2.6.11) ?

I'm going to check this now and hopefully fix it.




[ As a matter of fact, the IO_APIC shouldn't play any role in the
processing of MSI interrupts, which are addressed at (default) addr.
0xFEE0 in the CPU. An exception to thisareinterrupts
issued by PCI cards to the IO_APIC itself (default addr.: 0xFEC00020)
to trigger IRQs 0-23, which is a feature Linux doesn't seem to use and
wasseemingly intended for card mftrs. to support MSI without
changing the drivers. ]



Jeroen.


___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-15 Thread Jeroen Van den Keybus

At second sight, the patches are ok.

I´ve boiled the problem down to a lack of EOI. If I do __ack_APIC_irq()
by hand after the desc-handler-end() has run, the system no
longer freezes.

I'm finding out why that is.

Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-15 Thread Jeroen Van den Keybus

Ok. I´ve found it. The MSI interrupt type uses its end() handler to
acknowledge the interrupt using ack_APIC_irq() (drivers/pci/msi.c).
Xenomai uses the ack() handler to expedite the acknowledgement of an
IRQ. In case of MSI, ack() is a NOP.

The main problem is that Xenomai redefines ack_APIC_irq() calls (they
become NOPs, as defined in apic.h). Maybe the ISRs used so far never
issued ack_APIC_irq() themselves, and used always the IO-APIC (which
contains the correct __ack_APIC_irq() call) ?

I feel a bit awkward about changing msi.c .

Any opinions about how to change Xenomai / Linux ?



Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-15 Thread Jeroen Van den Keybus


I´m also investigating
why MSI also doesn´t work under RTDM. It´s merely a coincidence that
the above bug (MSI interrupts from Linux devices getting blocked)
emerged and produced exactly the same behaviour (system hanging).
It turns out not to be coincidential. rtdm_irq_request() (through
passing iack=NULL to virtualize_irq()) uses the default Linux driver as
an acknowledgement routine for that interrupt. So fixing regular Linux
interrupts also fixed RTDM operation.

I'll have to sleep over the best solution in msi.c . For now, I have
implemented an __ack_APIC_irq() in an routine ack_msi_irq_wo_maskbit().
How do I make a patch for that ?

As for the bitmasked varieties, I need to be careful here. First I'll
have a look at the details of MSI with maskbits. Some of this stuff has
actually been devised to allow deferral of IRQ acknowledgement. I
wouldn't want to break that feature.

Anyway, with this simple fix, I'm finally able to use my Dell GX270 without IRQ sharing for the first time :-) .


Jeroen.

[Xenomai-core] Handling PCI MSI interrupts

2006-02-14 Thread Jeroen Van den Keybus

I'm having problems using MSI interrupts in Xenomai.

When using normal PCI interrupts, my PCI card + RTDM driver work
flawlessly. However, I noticed the following while trying to enable MSI
(by having Linux do all the administrative register-fill-ins using
pci_enable_msi()):

1. The kernel is compiled for MSI. (PCI_CONFIG_MSI=yes)
2. My card correctly advertises the MSI capability (checked with 'lspci -v').
3. I do get a new IRQ number in the pci_dev struct (217 instead of 193) after invoking pci_enable_msi().
4. After pci_enable_smi(), 'lspci -v' correctly indicates that Message Signalled Interrupts are now enabled (Enable+).
5. I can register the new IRQ with rtdm_request_irq(). Return value is 0.
6. The IRQ number appears in /proc/xenomai/irq.
7. I can enable the new IRQ with rtdm_irq_enable(). Return value is 0.
8. I can disable the new IRQ with rtdm_irq_disable(). Return value is 0.
9. I can unregister the new IRQ with rtdm_irq_free(). Return value is 0.
10. I'm able to do pci_disable_pci() successfully.

If I actually enable the card interrupt, it goes wrong after 7 after
arrival of the first interrupt. The ISR is executed (checked that),
which checks if the card was actually interrupting and, if so, returns
with RTDM_IRQ_ENABLE. After that, the machine is absolutely dead
(frozen).

Any hints, suggestions or things I may check ?

Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-14 Thread Jeroen Van den Keybus

2. Keep my program alive for a few secs using task_sleep() or so to see if cleanup code is going awry.

Alas, hangs directly after going to sleep(). Strangely, console output always arrives to the screen unhurt...

Jeroen.


___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-14 Thread Jeroen Van den Keybus

I've got some bad news.

I've rewritten (parts of) my driver to operate in native Linux (i.e. no
RTDM, no Xenomai). When I run this in a Xenomai-augmented kernel (with
Adeos, evidently), the machine hangs. However, when the same code is
run in the same but unmodified kernel, the code works as expected.
Therefore it seems that Adeos is to blame here. 

Now, I could try to write a small driver program illustrating the
problem. Of course, it is impossible for you to test on my board. Maybe
we could agree on an MSI capable piece of hardware, that I could write
the code for ?

The reason I would very much like to test this setup is that I look
upon MSI as a very viable alternative to these ever-damned interrupt
sharing mechanisms. It is becoming more and more difficult to find
computers that allow you to reserve (a few) IRQ-lines these days
(especially notebooks and OEM desktops). Additionally, MSI is capable
of bypassing the IO-APIC entirely (although the Linux kernel needs that
APIC support to enable MSI - I've still got to find out why that is).

Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-14 Thread Jeroen Van den Keybus

In the meantime, I've located the rthal_irq_disable() and used it instead of the RTDM return value (which is now 0).

The machine still hangs.

More importantly, I noticed that the second (after the first) interrupt
gets lost (as is to be expected when the interrupt remains disabled).
This causes the RTDM driver to timeout and the Xenomai program using
the RTDM driver is actually able to gracefully shut down. It seems
that, upon exiting, the machine hangs.

I'll now try two new tests:

1. Turn off the cards's interrupts after the first IRQ has arrived.
2. Keep my program alive for a few secs using task_sleep() or so to see if cleanup code is going awry.

Any other tests ?

Jeroen.

Re: [Xenomai-core] Handling PCI MSI interrupts

2006-02-14 Thread Jeroen Van den Keybus

I've got some bad news.

I've rewritten (parts of) my driver to operate in native Linux (i.e. no
RTDM, no Xenomai). When I run this in a Xenomai-augmented kernel (with
Adeos, evidently), the machine hangs. However, when the same code is
run in the same but unmodified kernel, the code works as expected.
Therefore it seems that Adeos is to blame here. 

Now, I could try to write a small driver program illustrating the
problem. Of course, it is impossible for you to test on my board. Maybe
we could agree on an MSI capable piece of hardware, that I could write
the code for ?

The reason I would very much like to test this setup is that I look
upon MSI as a very viable alternative to these ever-damned interrupt
sharing mechanisms. It is becoming more and more difficult to find
computers that allow you to reserve (a few) IRQ-lines these days
(especially notebooks and OEM desktops). Additionally, MSI is capable
of bypassing the IO-APIC entirely (although the Linux kernel needs that
APIC support to enable MSI - I've still got to find out why that is).

Jeroen.

Re: [Xenomai-core] [PATCH] Shared irqs v.6

2006-02-01 Thread Jeroen Van den Keybus

I mean that the support of shared interrupts for ISA boards (edge-triggered stuff) is a kind of emulation to overcome the shortcommings of the initial
design on the hardware level. The hardware was just not supposed to support shared interrupt channels. So, let's keep it a bit aside from another code :o)Unfortunately, this crappy hardware is still quite common in the
embedded domain.

If I've understood correctly, the only reason for having this support is to avoid - after discovering an interrupting UART - that the IRQ line remains high upon exit, which would cause the 8259 not to issue the CPU IRQ for that line anymore ?

The proposed solution is therefore to traverse the entire list of UARTsconnected tothat IRQ line and make sure none of them was interrupting in two consecutive passes (by checking their status registers). That would mean the IRQ line must be deasserted and the 8259 will properly detect any newly arriving interrupts, since the 8259 has been acknowledged before handling the interrupt.

1. Wouldn't it be more efficient to make this a compile-time option, instead of burdening the nucleus with it ?
2. Would it be an option, in the embedded boards Jan is speaking of, to put the 8259 in level sensitive mode ? Some of the boards I know don't actually have this selection logic for the built-in interrupt sources such as the timer and the IDE I/F and it therefore only applies to the ISA bus.

3. Beware ofUARTs that cause interrupts and have a problem that causes them to spuriously return 'all green' upon reading of theIIR register. I have dealt with an integrated ('Super I/O') card with that problem before. The only solution was to look at LSR and check THRE as well, even when no TX interrupt was seemingly present. Must be a logic race.

Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-02-01 Thread Jeroen Van den Keybus


 I've installed both patches and the problem seems to have disappeared. I'll try it on another machine tomorrow, too. Meanwhile: thanks very
 much for the assistance !

While testing more thoroughly, my triggers for zero mutex values after acquiring the lock are going off again.I wasusing the SVN xenomai development tree, but I've now switched to the (fixed) 2.1-rc2 in order to apply the patches. Is Jan's bugfix included in that one ?


Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-02-01 Thread Jeroen Van den Keybus


 I've installed both patches and the problem seems to have disappeared. I'll try it on another machine tomorrow, too. Meanwhile: thanks very
 much for the assistance !

While testing more thoroughly, my triggers for zero mutex values after acquiring the lock are going off again.I wasusing the SVN xenomai development tree, but I've now switched to the (fixed) 2.1-rc2 in order to apply the patches. Is Jan's bugfix included in that one ?


Jeroen.

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-02-01 Thread Jeroen Van den Keybus

Revision 466 contains the mutex-info fix, but that is post -rc2. Why notswitching to SVN head?


Philippe asked to applythe patch against Xenomai 2.1-rc2. Can I safely patch it against the SVN tree ? After that,what will 'svn up' do to the patched tree ?

Remember I'm quite new to Linux. Actually, Ispenthalf an hour finding out how that patch stuff (especially the -p option) works.

Jeroen.

Re: [Xenomai-core] [PATCH] Shared irqs v.6

2006-02-01 Thread Jeroen Van den Keybus

Jeroen.

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-01-31 Thread Jeroen Van den Keybus

And now, Ladies and Gentlemen, with the patches attached.

I've installed both patches and the problem seems to have disappeared. I'll try it on another machine tomorrow, too. Meanwhile: thanks very much forthe assistance !
Jeroen.

___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-01-31 Thread Jeroen Van den Keybus

And now, Ladies and Gentlemen, with the patches attached.

I've installed both patches and the problem seems to have disappeared. I'll try it on another machine tomorrow, too. Meanwhile: thanks very much forthe assistance !
Jeroen.

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-01-23 Thread Jeroen Van den Keybus

Hello,

I'm currently not at a level to participate in your discussion. Although I'm willing to supply you with stresstests, I would nevertheless like to learn more from task migrationas this debugging session proceeds. In order to do so, please confirm the following statements or indicate where I went wrong. I hope others may learn from this as well.

xn_shadow_harden(): This is called whenever a Xenomai thread performs a Linux (root domain) system call (notified by Adeos ?). The migrating thread (nRT) is marked INTERRUPTIBLE and run by the Linux kernel wake_up_interruptible_sync() call. Is this thread actually run or does it merely put the thread in some Linux to-do list(I assumed the first case) ? And how does it terminate: is only the system call migrated or is the thread allowed to continue run (at a priority level equal to the Xenomai prioritylevel) until it hits something of the Xenomai API (or trivially: explicitly go to RT using the API) ? In that case, Iexpect the nRT thread to terminate with a schedule() call in the Xeno OS API code which deactivates the task so that it won't ever run in Linux context anymore. A top priority gatekeeper is in place as a software hook to catch Linux's attentionright after that schedule(), which might otherwise schedule something else (and leave only interrupts for Xenomai to come back to life again). I have the impression that I cannot see this gatekeeper, nor the (n)RT threads using the ps command ?

Is it correct to state that thecurrent preemption issueis due to the gatekeeper beinginvoked too soon? Could someone knowing more about the migration technology explain what exactly goes wrong ?

Thanks,

Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-01-23 Thread Jeroen Van den Keybus

Hello,

Is it correct to state that thecurrent preemption issueis due to the gatekeeper beinginvoked too soon? Could someone knowing more about the migration technology explain what exactly goes wrong ?

Thanks,

Jeroen.

[Xenomai-core] Re: [BUG] racy xnshadow_harden under CONFIG_PREEMPT

2006-01-21 Thread Jeroen Van den Keybus


@Jeroen: Did you verify that your setup also works fine withoutCONFIG_PREEMPT?

Verified. Your workaround works. No more dmesg logs.

[Xenomai-core] Scheduling while atomic

2006-01-18 Thread Jeroen Van den Keybus

Hm.

When I remove the output() from both tasks, all seems fine.

Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Re: [Xenomai-core] Scheduling while atomic

2006-01-18 Thread Jeroen Van den Keybus


Hold on. Just crashed without the file access: please disregard last post.


Jeroen.
___
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

[Xenomai-core] Scheduling while atomic

2006-01-18 Thread Jeroen Van den Keybus

Gilles,


I cannot reproduce those messages after turning nucleus debugging on. Instead, I now either get relatively more failing mutexes oreven hard lockups with the test program I sent to you. If thecomputer didn't crash, dmesg contains 3 Xenomai messages relating to a task being movend to secondary domain after exception #14. Aswhen thecomputer crashes: I have written the last kernel panic message on a paper. Please tell if you want also the addresses or (part of) the call stack.


I'm still wondering if there's a programming error in the mutex test program. After I sent my previous message, and before I turned nucleus debugging on, I managed (by reducing the sleeptimes to max. 5.0e4) to fatally crash the computer,while spewing out countless 'scheduling while atomic messages'.Is the mutex error reproducible ?


Tomorrow I'll try the patch.

lostage_handler + e/33a
rthal_apc_handler + 3b/46
lostage_handler + 190/33a
rthal_apc_handler + 3b/46
__ipipe_sync_stage + 2a1/2bc
mark_offset_tsc + c1/456
__ipipe_sync_stage + 2a9/2bc
ipipe_unstall_pipeline_from + 189/194 (might be 181/194)
xnpod_delete_thread + ba1/bc3
mcount + 23/2a
taskexit_event + 4f/6c
__ipipe_dispatch_event + 90/173
do_exit + 10f/604
sys_exit + 8/14
syscall_call + 7/b
next_thread + 0/15
syscall_call + 7/b

0 Kernel panic - not syncing: Fatal Exception in interrupt


Thanks for investigating,

Jeroen.

[Xenomai-core] Scheduling while atomic

2006-01-18 Thread Jeroen Van den Keybus

Hello,


Apparently, the code I shared with Gilles never made it to this forum. Anyway, the issue I'm having here is really a problem and it might be useful if some of you could try it out or comment on it. I might be making a silly programming error here, but the result is invariably erroneous operation or kernel crashes.


The program creates a file dump.txt and has two independent threads trying to access it and write a one or a zero there. Inside the writing routine, which is accessed by both threads, a check is made to see if the access is really locked. In my setup, I have tons of ALERTS popping up with this program, meaning that something is wrong with my use of mutex. Could anyone please check and see if a) it is correctly written and b) it fails as well on their machine. It would allow me to focus my actions on the Xenomai setup (which I keep frozen this instant, in order to keep a possible bug predictable) or on my own programming.


A second example is also included, which tries to achieve the same goal with a semaphore (initialized to 1). That seems to work, but under heavy load (tmax = 1.0e7), the kernel crashes.

Kernel: 2.6.15 Adeos: 1.1-03 gcc: 4.0.2 Ipipe tracing enabled

TIA

Jeroen.



/* TEST_MUTEX.C */
#include stdlib.h#include stdio.h#include unistd.h#include fcntl.h#include signal.h#include math.h#include values.h
#include sys/mman.h
#include native/task.h#include native/mutex.h#include native/sem.h
int fd, err;RT_MUTEX m;RT_SEM s;float tmax = 1.0e7;
#define CHECK(arg) check(arg, __LINE__)
int check(int r, int n){ if (r != 0) fprintf(stderr, L%d: %s.\n, n, strerror(-r)); return(r);}
void output(char c) { static int cnt = 0; int n; char buf[2]; RT_MUTEX_INFO mutexinfo;  buf[0] = c;  if (cnt == 80) { buf[1] = '\n'; n = 2;
 cnt = 0; } else { n = 1; cnt++; }  CHECK(rt_mutex_inquire(m, mutexinfo)); if (mutexinfo.lockcnt = 0) { RT_TASK_INFO taskinfo;
 CHECK(rt_task_inquire(NULL, taskinfo)); fprintf(stderr, ALERT: No lock! (lockcnt=%d) Offending task: %s\n, mutexinfo.lockcnt, taskinfo.name
); }  if (write(fd, buf, n) != n) { fprintf(stderr, File write error.\n); CHECK(rt_sem_v(s)); } }
void task0(void *arg){ CHECK(rt_task_set_mode(T_PRIMARY, 0, NULL)); while (1) { CHECK(rt_task_sleep((float)rand()*tmax/(float)RAND_MAX)); CHECK(rt_mutex_lock(m, TM_INFINITE));
 output('0'); CHECK(rt_mutex_unlock(m)); }}
void task1(void *arg){ CHECK(rt_task_set_mode(T_PRIMARY, 0, NULL)); while (1) { CHECK(rt_task_sleep((float)rand()*tmax/(float)RAND_MAX)); CHECK(rt_mutex_lock(m, TM_INFINITE));
 output('1'); CHECK(rt_mutex_unlock(m)); }}
void sighandler(int arg){ CHECK(rt_sem_v(s));}
int main(int argc, char *argv[]){ RT_TASK t, t0, t1;  if ((fd = open(dump.txt, O_CREAT | O_TRUNC | O_WRONLY))  0) fprintf(stderr, File open error.\n);
 else { if (argc == 2) { tmax = atof(argv[1]); if (tmax == 0.0) tmax = 1.0e7; } if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) printf(mlockall() error.\n);
  CHECK(rt_task_shadow(t, main, 1, T_FPU));
 CHECK(rt_timer_start(TM_ONESHOT));  CHECK(rt_mutex_create(m, mutex)); CHECK(rt_sem_create(s, sem, 0, S_PRIO));
 signal(SIGINT, sighandler);  CHECK(rt_task_create(t0, task0, 0, 30, T_FPU)); CHECK(rt_task_start(t0, task0, NULL)); CHECK(rt_task_create(t1, task1, 0, 29, T_FPU));
 CHECK(rt_task_start(t1, task1, NULL));
 printf(Running for %.2f seconds.\n, (float)MAXLONG/1.0e9); CHECK(rt_sem_p(s, MAXLONG)); signal(SIGINT, SIG_IGN);  CHECK(rt_task_delete(t1));
 CHECK(rt_task_delete(t1)); CHECK(rt_task_delete(t0));  CHECK(rt_sem_delete(s)); CHECK(rt_mutex_delete(m));  rt_timer_stop();
  close(fd); } return 0;}
/*/

/* TEST_SEM.C */
#include stdlib.h#include stdio.h#include unistd.h#include fcntl.h#include signal.h#include math.h#include values.h
#include sys/mman.h
#include native/task.h#include native/sem.h
int fd, err;RT_SEM s, m;float tmax = 1.0e9;
#define CHECK(arg) check(arg, __LINE__)
int check(int r, int n){ if (r != 0) fprintf(stderr, L%d: %s.\n, n, strerror(-r)); return(r);}
void output(char c) { static int cnt = 0; int n; char buf[2]; RT_SEM_INFO seminfo;  buf[0] = c;  if (cnt == 80) { buf[1] = '\n'; n = 2;
 cnt = 0; } else { n = 1; cnt++; }  CHECK(rt_sem_inquire(m, seminfo)); if (seminfo.count != 0) { RT_TASK_INFO taskinfo; CHECK(rt_task_inquire(NULL, taskinfo));
 fprintf(stderr, ALERT: No lock! (count=%ld) Offending task: %s\n, seminfo.count, taskinfo.name); }  if (write(fd, buf, n) != n) {
 fprintf(stderr, File write error.\n); CHECK(rt_sem_v(s)); } }
void task0(void *arg){ CHECK(rt_task_set_mode(T_PRIMARY, 0, NULL)); while (1) { CHECK(rt_task_sleep((float)rand()*tmax/(float)RAND_MAX)); CHECK(rt_sem_p(m, TM_INFINITE));
 output('0'); CHECK(rt_sem_v(m)); }}
void task1(void *arg){ CHECK(rt_task_set_mode(T_PRIMARY, 0, NULL)); while (1) { CHECK(rt_task_sleep((float)rand()*tmax/(float)RAND_MAX)); CHECK(rt_sem_p(m, TM_INFINITE));
 output('1'); CHECK(rt_sem_v(m)); }}
void sighandler(int arg){

Re: [Xenomai-core] Scheduling while atomic

2006-01-18 Thread Jeroen Van den Keybus

Hm.

When I remove the output() from both tasks, all seems fine.

Jeroen.

37 matches

Mail list logo