Re: In-tree version of new FireWire drivers available

2007-01-27 Thread Stefan Richter
On 26 Jan, Andrew Morton wrote:
> arm:
> 
> drivers/firewire/fw-device.c: In function `fw_device_init':
> drivers/firewire/fw-device.c:495: warning: implicit declaration of function 
> `cmpxchg' 
>   
> 
> We can't use cmpxchg in generic code.


From: Stefan Richter <[EMAIL PROTECTED]>
Subject: firewire: use atomic type for fw_device.state

Signed-off-by: Stefan Richter <[EMAIL PROTECTED]>
---
 drivers/firewire/fw-card.c   |2 +-
 drivers/firewire/fw-device.c |6 +++---
 drivers/firewire/fw-device.h |3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

Index: linux-2.6.20-rc5/drivers/firewire/fw-device.h
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-device.h
+++ linux-2.6.20-rc5/drivers/firewire/fw-device.h
@@ -24,6 +24,7 @@
 
 #include 
 #include 
+#include 
 
 enum fw_device_state {
FW_DEVICE_INITIALIZING,
@@ -32,7 +33,7 @@ enum fw_device_state {
 };
 
 struct fw_device {
-   int state;
+   atomic_t state;
struct fw_node *node;
int node_id;
int generation;
Index: linux-2.6.20-rc5/drivers/firewire/fw-device.c
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-device.c
+++ linux-2.6.20-rc5/drivers/firewire/fw-device.c
@@ -494,7 +494,7 @@ static void fw_device_init(struct work_s
 * necessary.  We have to use the atomic cmpxchg here to avoid
 * racing with the FW_NODE_DESTROYED case in
 * fw_node_event(). */
-   if (cmpxchg(>state,
+   if (atomic_cmpxchg(>state,
FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
fw_device_shutdown(>work.work);
@@ -551,7 +551,7 @@ void fw_node_event(struct fw_card *card,
 * device_for_each_child() in FW_NODE_UPDATED is
 * doesn't freak out. */
device_initialize(>device);
-   device->state = FW_DEVICE_INITIALIZING;
+   atomic_set(>state, FW_DEVICE_INITIALIZING);
device->card = fw_card_get(card);
device->node = fw_node_get(node);
device->node_id = node->node_id;
@@ -596,7 +596,7 @@ void fw_node_event(struct fw_card *card,
 * the device in shutdown state to have that code fail
 * to create the device. */
device = node->data;
-   if (xchg(>state,
+   if (atomic_xchg(>state,
 FW_DEVICE_SHUTDOWN) == FW_DEVICE_RUNNING) {
INIT_DELAYED_WORK(>work, fw_device_shutdown);
schedule_delayed_work(>work, 0);
Index: linux-2.6.20-rc5/drivers/firewire/fw-card.c
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-card.c
+++ linux-2.6.20-rc5/drivers/firewire/fw-card.c
@@ -302,7 +302,7 @@ fw_card_bm_work(struct work_struct *work
/* Either link_on is false, or we failed to read the
 * config rom.  In either case, pick another root. */
new_root_id = card->local_node->node_id;
-   } else if (root->state != FW_DEVICE_RUNNING) {
+   } else if (atomic_read(>state) != FW_DEVICE_RUNNING) {
/* If we haven't probed this device yet, bail out now
 * and let's try again once that's done. */
spin_unlock_irqrestore(>lock, flags);


-- 
Stefan Richter
-=-=-=== ---= ==-==
http://arcgraph.de/sr/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-27 Thread Stefan Richter
On 26 Jan, Andrew Morton wrote:
 arm:
 
 drivers/firewire/fw-device.c: In function `fw_device_init':
 drivers/firewire/fw-device.c:495: warning: implicit declaration of function 
 `cmpxchg' 
   
 
 We can't use cmpxchg in generic code.


From: Stefan Richter [EMAIL PROTECTED]
Subject: firewire: use atomic type for fw_device.state

Signed-off-by: Stefan Richter [EMAIL PROTECTED]
---
 drivers/firewire/fw-card.c   |2 +-
 drivers/firewire/fw-device.c |6 +++---
 drivers/firewire/fw-device.h |3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

Index: linux-2.6.20-rc5/drivers/firewire/fw-device.h
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-device.h
+++ linux-2.6.20-rc5/drivers/firewire/fw-device.h
@@ -24,6 +24,7 @@
 
 #include linux/fs.h
 #include linux/cdev.h
+#include asm/atomic.h
 
 enum fw_device_state {
FW_DEVICE_INITIALIZING,
@@ -32,7 +33,7 @@ enum fw_device_state {
 };
 
 struct fw_device {
-   int state;
+   atomic_t state;
struct fw_node *node;
int node_id;
int generation;
Index: linux-2.6.20-rc5/drivers/firewire/fw-device.c
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-device.c
+++ linux-2.6.20-rc5/drivers/firewire/fw-device.c
@@ -494,7 +494,7 @@ static void fw_device_init(struct work_s
 * necessary.  We have to use the atomic cmpxchg here to avoid
 * racing with the FW_NODE_DESTROYED case in
 * fw_node_event(). */
-   if (cmpxchg(device-state,
+   if (atomic_cmpxchg(device-state,
FW_DEVICE_INITIALIZING,
FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
fw_device_shutdown(device-work.work);
@@ -551,7 +551,7 @@ void fw_node_event(struct fw_card *card,
 * device_for_each_child() in FW_NODE_UPDATED is
 * doesn't freak out. */
device_initialize(device-device);
-   device-state = FW_DEVICE_INITIALIZING;
+   atomic_set(device-state, FW_DEVICE_INITIALIZING);
device-card = fw_card_get(card);
device-node = fw_node_get(node);
device-node_id = node-node_id;
@@ -596,7 +596,7 @@ void fw_node_event(struct fw_card *card,
 * the device in shutdown state to have that code fail
 * to create the device. */
device = node-data;
-   if (xchg(device-state,
+   if (atomic_xchg(device-state,
 FW_DEVICE_SHUTDOWN) == FW_DEVICE_RUNNING) {
INIT_DELAYED_WORK(device-work, fw_device_shutdown);
schedule_delayed_work(device-work, 0);
Index: linux-2.6.20-rc5/drivers/firewire/fw-card.c
===
--- linux-2.6.20-rc5.orig/drivers/firewire/fw-card.c
+++ linux-2.6.20-rc5/drivers/firewire/fw-card.c
@@ -302,7 +302,7 @@ fw_card_bm_work(struct work_struct *work
/* Either link_on is false, or we failed to read the
 * config rom.  In either case, pick another root. */
new_root_id = card-local_node-node_id;
-   } else if (root-state != FW_DEVICE_RUNNING) {
+   } else if (atomic_read(root-state) != FW_DEVICE_RUNNING) {
/* If we haven't probed this device yet, bail out now
 * and let's try again once that's done. */
spin_unlock_irqrestore(card-lock, flags);


-- 
Stefan Richter
-=-=-=== ---= ==-==
http://arcgraph.de/sr/

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Andrew Morton
On Tue, 23 Jan 2007 22:48:15 -0500
"Kristian Høgsberg" <[EMAIL PROTECTED]> wrote:

> I've moved the new FireWire stack to an in-tree git repository and
> moved over the missing patches from my out-of-tree version.

arm:

drivers/firewire/fw-device.c: In function `fw_device_init':
drivers/firewire/fw-device.c:495: warning: implicit declaration of function 
`cmpxchg'   


We can't use cmpxchg in generic code.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Pieter Palmers

Robert Crocombe wrote:

On 1/25/07, Pieter Palmers <[EMAIL PROTECTED]> wrote:

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I
remember correctly, the page size on my system is 4096 bytes, being 1024
quadlets. If we assume a 4 channel audio stream, this corresponds to 256
audio samples. This means that the controller generates an interrupt
every 256 samples, making that we can achieve a latency of 512 samples
at best. This is unacceptable in a pro-audio environment.

The current stack exhibits this problem, and I solve it by recalculating
the max packet size, based upon the stream composition (i.e. expected
packet size) and the requested audio buffer size, such that the
interrupts are generated at a high enough frequency.

I'm not a kernel hacker, but when looking through the code I had the
impression that smaller DMA buffers were possible (aren't smaller
buffers used in packet-per-buffer mode?).


I am using isochronous receive in RAW1394_DMA_PACKET_PER_BUFFER mode
because I am closing a simulation loop around the data that is
received/transmitted.  Just for giggles I cranked up a test
isochronous stream from a bus analyzer at 1kB per packet at 8kHz at
the S400 rate (i.e., one packet on each cycle start: 8MBps ), set the
machine up to listen, and was able to maintain 8kHz interrupts at ~12%
CPU utilization on a 2.8GHz Opteron.

   1744719 interrupts int 218.112 seconds is 7999.193 ints/sec

I wasn't doing anything with the data for this test, but I have had
the aforementioned sim running steady at a somewhat lower rate.  This
test ran under 2.6.20-rc5-rt10, but the more "productiony" system is
on 2.6.16-rt29.

So hopefully you can get markedly lower latencies.  Myself, I'm
tickled pink by the performance that can be achieved.


I don't really understand what you are trying to say here. The overhead
of running in RAW1394_DMA_PACKET_PER_BUFFER mode is only acceptable for
very small buffer sizes. Usually one packet consists of 8 to 32 frames
(depending on the framerate of the stream), a frame being one sample of
all audio channels.

Currently I prefer about 4 interrupts per period, as we need some slack
to cope with the variable amount of no-data packets. So the
RAW1394_DMA_PACKET_PER_BUFFER mode is needed only for buffer sizes of 32
frames (assuming 8 frames per packet). Higher buffer sizes should use
another mode, because otherwise we're burning CPU cycles for no good
reason (12% cpu load is a little too high for me). The most frequently
used buffer sizes are around 128 frames, so that would mean 16
interrupts per period (4 times too much).

The way I currently solve this is by using the BUFFERFILL mode, but I 
inform the kernel that I expect packets that are larger than what I will 
effectively receive. If you specify a max_packet_size of 4096/4 bytes, 
every 4 packets the DMA buffer will be full and an interrupt will be 
generated. Internally it's called buff_stride if I'm not mistaking.


But again, what exactly is your point in this message?

Pieter



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Robert Crocombe

On 1/25/07, Pieter Palmers <[EMAIL PROTECTED]> wrote:

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I
remember correctly, the page size on my system is 4096 bytes, being 1024
quadlets. If we assume a 4 channel audio stream, this corresponds to 256
audio samples. This means that the controller generates an interrupt
every 256 samples, making that we can achieve a latency of 512 samples
at best. This is unacceptable in a pro-audio environment.

The current stack exhibits this problem, and I solve it by recalculating
the max packet size, based upon the stream composition (i.e. expected
packet size) and the requested audio buffer size, such that the
interrupts are generated at a high enough frequency.

I'm not a kernel hacker, but when looking through the code I had the
impression that smaller DMA buffers were possible (aren't smaller
buffers used in packet-per-buffer mode?).


I am using isochronous receive in RAW1394_DMA_PACKET_PER_BUFFER mode
because I am closing a simulation loop around the data that is
received/transmitted.  Just for giggles I cranked up a test
isochronous stream from a bus analyzer at 1kB per packet at 8kHz at
the S400 rate (i.e., one packet on each cycle start: 8MBps ), set the
machine up to listen, and was able to maintain 8kHz interrupts at ~12%
CPU utilization on a 2.8GHz Opteron.

  1744719 interrupts int 218.112 seconds is 7999.193 ints/sec

I wasn't doing anything with the data for this test, but I have had
the aforementioned sim running steady at a somewhat lower rate.  This
test ran under 2.6.20-rc5-rt10, but the more "productiony" system is
on 2.6.16-rt29.

So hopefully you can get markedly lower latencies.  Myself, I'm
tickled pink by the performance that can be achieved.

--
Robert Crocombe
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Robert Crocombe

On 1/25/07, Pieter Palmers [EMAIL PROTECTED] wrote:

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I
remember correctly, the page size on my system is 4096 bytes, being 1024
quadlets. If we assume a 4 channel audio stream, this corresponds to 256
audio samples. This means that the controller generates an interrupt
every 256 samples, making that we can achieve a latency of 512 samples
at best. This is unacceptable in a pro-audio environment.

The current stack exhibits this problem, and I solve it by recalculating
the max packet size, based upon the stream composition (i.e. expected
packet size) and the requested audio buffer size, such that the
interrupts are generated at a high enough frequency.

I'm not a kernel hacker, but when looking through the code I had the
impression that smaller DMA buffers were possible (aren't smaller
buffers used in packet-per-buffer mode?).


I am using isochronous receive in RAW1394_DMA_PACKET_PER_BUFFER mode
because I am closing a simulation loop around the data that is
received/transmitted.  Just for giggles I cranked up a test
isochronous stream from a bus analyzer at 1kB per packet at 8kHz at
the S400 rate (i.e., one packet on each cycle start: 8MBps ), set the
machine up to listen, and was able to maintain 8kHz interrupts at ~12%
CPU utilization on a 2.8GHz Opteron.

  1744719 interrupts int 218.112 seconds is 7999.193 ints/sec

I wasn't doing anything with the data for this test, but I have had
the aforementioned sim running steady at a somewhat lower rate.  This
test ran under 2.6.20-rc5-rt10, but the more productiony system is
on 2.6.16-rt29.

So hopefully you can get markedly lower latencies.  Myself, I'm
tickled pink by the performance that can be achieved.

--
Robert Crocombe
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Pieter Palmers

Robert Crocombe wrote:

On 1/25/07, Pieter Palmers [EMAIL PROTECTED] wrote:

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I
remember correctly, the page size on my system is 4096 bytes, being 1024
quadlets. If we assume a 4 channel audio stream, this corresponds to 256
audio samples. This means that the controller generates an interrupt
every 256 samples, making that we can achieve a latency of 512 samples
at best. This is unacceptable in a pro-audio environment.

The current stack exhibits this problem, and I solve it by recalculating
the max packet size, based upon the stream composition (i.e. expected
packet size) and the requested audio buffer size, such that the
interrupts are generated at a high enough frequency.

I'm not a kernel hacker, but when looking through the code I had the
impression that smaller DMA buffers were possible (aren't smaller
buffers used in packet-per-buffer mode?).


I am using isochronous receive in RAW1394_DMA_PACKET_PER_BUFFER mode
because I am closing a simulation loop around the data that is
received/transmitted.  Just for giggles I cranked up a test
isochronous stream from a bus analyzer at 1kB per packet at 8kHz at
the S400 rate (i.e., one packet on each cycle start: 8MBps ), set the
machine up to listen, and was able to maintain 8kHz interrupts at ~12%
CPU utilization on a 2.8GHz Opteron.

   1744719 interrupts int 218.112 seconds is 7999.193 ints/sec

I wasn't doing anything with the data for this test, but I have had
the aforementioned sim running steady at a somewhat lower rate.  This
test ran under 2.6.20-rc5-rt10, but the more productiony system is
on 2.6.16-rt29.

So hopefully you can get markedly lower latencies.  Myself, I'm
tickled pink by the performance that can be achieved.


I don't really understand what you are trying to say here. The overhead
of running in RAW1394_DMA_PACKET_PER_BUFFER mode is only acceptable for
very small buffer sizes. Usually one packet consists of 8 to 32 frames
(depending on the framerate of the stream), a frame being one sample of
all audio channels.

Currently I prefer about 4 interrupts per period, as we need some slack
to cope with the variable amount of no-data packets. So the
RAW1394_DMA_PACKET_PER_BUFFER mode is needed only for buffer sizes of 32
frames (assuming 8 frames per packet). Higher buffer sizes should use
another mode, because otherwise we're burning CPU cycles for no good
reason (12% cpu load is a little too high for me). The most frequently
used buffer sizes are around 128 frames, so that would mean 16
interrupts per period (4 times too much).

The way I currently solve this is by using the BUFFERFILL mode, but I 
inform the kernel that I expect packets that are larger than what I will 
effectively receive. If you specify a max_packet_size of 4096/4 bytes, 
every 4 packets the DMA buffer will be full and an interrupt will be 
generated. Internally it's called buff_stride if I'm not mistaking.


But again, what exactly is your point in this message?

Pieter



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-26 Thread Andrew Morton
On Tue, 23 Jan 2007 22:48:15 -0500
Kristian Høgsberg [EMAIL PROTECTED] wrote:

 I've moved the new FireWire stack to an in-tree git repository and
 moved over the missing patches from my out-of-tree version.

arm:

drivers/firewire/fw-device.c: In function `fw_device_init':
drivers/firewire/fw-device.c:495: warning: implicit declaration of function 
`cmpxchg'   


We can't use cmpxchg in generic code.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-25 Thread Pieter Palmers

Kristian Høgsberg wrote:

On 1/24/07, Pieter Palmers <[EMAIL PROTECTED]> wrote:

Kristian Høgsberg wrote:

Changes since the merge into the linux1394 tree include:

 - gap count optimization
 - full bus management
 - loopback for async requests to the local node
 - a bug fix for a problem exposed by VIA 6306 controllers
 - a typo fix from the bitfield -> mask+shift conversion.

Kristian,

What is your ETA on a the completion of the isochronous interface?


I'm hoping I can wrap this up within the next 1-2 weeks.  So far I've
been thinking about how to use the dualbuffer receive mode, and it
turns out that it's a little tricky.  It's nothing that can't be
worked around, but I haven't yet made up my mind on the design.

Just to recap, the dual buffer receive mode, as described in section
10.2.3 of the OHCI spec allows us to set up DMA so that a fixed,
quadlet aligned amount of header data can be appended into one buffer
and the rest is appended into another buffer.  This allows us to strip
out the ieee1394 iso header as well as the iec61883 header for those
protocols.  That way DMA can assemble a complete DV frame without CPU
intervention, strip off audio headers or just strip the iso header
like video1394 does, which is sufficient for IIDC cameras.  So this
has the potential of actually replacing video1394 while at the same
time generalizing the iso header stripping feature to be useful for
iec61883 based protocols.

The problem is that the dual buffer descriptor stops appending when
*either* the header buffer or the payload buffer fills up.  When the
payload buffer fills up, and this is what we'll typically hit, the
last packet will continue into the buffer setup in the next desriptor,
and the contents will probably straddle the two buffers.  Each buffer
will be a page in memory and since we map those into user space linear
memory, that's not a problem.

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I 
remember correctly, the page size on my system is 4096 bytes, being 1024 
quadlets. If we assume a 4 channel audio stream, this corresponds to 256 
audio samples. This means that the controller generates an interrupt 
every 256 samples, making that we can achieve a latency of 512 samples 
at best. This is unacceptable in a pro-audio environment.


The current stack exhibits this problem, and I solve it by recalculating 
the max packet size, based upon the stream composition (i.e. expected 
packet size) and the requested audio buffer size, such that the 
interrupts are generated at a high enough frequency.


I'm not a kernel hacker, but when looking through the code I had the 
impression that smaller DMA buffers were possible (aren't smaller 
buffers used in packet-per-buffer mode?).




However, the other case is when the header buffer fills up.  In this
case, the DMA engine moves on to the next descriptor in the list and
starts from new in the payload buffer from that descriptor.  This
leaves a gap in the payload buffer associated with the old descriptor.
 Since this gap is within a page, we can't just map it away in the
linear user space mapping of the buffers, user space will see this gap
and have to compensate, by copying, for example.

We obviously want to avoid gaps in the payload buffer, so setting up
these descriptors, we need to make sure that the header buffer is big
enough to hold headers for all the packets it takes to fill up the
payload buffer.  Now the packetization process isn't deterministic -
in simple cases where the remote device is sampling using a clock
based off of the bus clock domain, then, for example, a 48kHz audio
signal can send 6 samples every cycle or maybe 3 packets with 8
samples and one empty packet consistently.  But if the AD converter is
driven using a seperate clock, there is going to be clock skew, and
suddenly there might be an extra empty packet.  And the thing is, even
without the clock skew problem, you don't know how the remote device
is going to throttle the packets.  All this to say that for a given
payload size, there is no way to reliably know how many packets the
remote device will use to transmit that payload.
A gap doesn't necessarily have to be a big problem as long as we know 
its position and size. I don't think it's a lot of overhead to skip a 
gap once in a while. Not having any would be better of course.


The ability to skip a gap will have to be implemented in (some) clients 
anyway, because it is not certain that a no-data packet won't contain 
payload. The current class driver for audio devices from Apple sends 
payload along with it's no-data packets. I don't really know if this is 
according to spec, but I assume so (haven't got them at hand).


In our application (FreeBoB), we know in advance what the size of a 
packet is going to be, as we only use blocking transmission. We also 
know how many (non no-data) packets we want to receive before being 
notified (some fraction of the audio 

Re: In-tree version of new FireWire drivers available

2007-01-25 Thread Pieter Palmers

Kristian Høgsberg wrote:

On 1/24/07, Pieter Palmers [EMAIL PROTECTED] wrote:

Kristian Høgsberg wrote:

Changes since the merge into the linux1394 tree include:

 - gap count optimization
 - full bus management
 - loopback for async requests to the local node
 - a bug fix for a problem exposed by VIA 6306 controllers
 - a typo fix from the bitfield - mask+shift conversion.

Kristian,

What is your ETA on a the completion of the isochronous interface?


I'm hoping I can wrap this up within the next 1-2 weeks.  So far I've
been thinking about how to use the dualbuffer receive mode, and it
turns out that it's a little tricky.  It's nothing that can't be
worked around, but I haven't yet made up my mind on the design.

Just to recap, the dual buffer receive mode, as described in section
10.2.3 of the OHCI spec allows us to set up DMA so that a fixed,
quadlet aligned amount of header data can be appended into one buffer
and the rest is appended into another buffer.  This allows us to strip
out the ieee1394 iso header as well as the iec61883 header for those
protocols.  That way DMA can assemble a complete DV frame without CPU
intervention, strip off audio headers or just strip the iso header
like video1394 does, which is sufficient for IIDC cameras.  So this
has the potential of actually replacing video1394 while at the same
time generalizing the iso header stripping feature to be useful for
iec61883 based protocols.

The problem is that the dual buffer descriptor stops appending when
*either* the header buffer or the payload buffer fills up.  When the
payload buffer fills up, and this is what we'll typically hit, the
last packet will continue into the buffer setup in the next desriptor,
and the contents will probably straddle the two buffers.  Each buffer
will be a page in memory and since we map those into user space linear
memory, that's not a problem.

I'd like to make one note here:
We should have a way to use smaller DMA buffers than one page size. If I 
remember correctly, the page size on my system is 4096 bytes, being 1024 
quadlets. If we assume a 4 channel audio stream, this corresponds to 256 
audio samples. This means that the controller generates an interrupt 
every 256 samples, making that we can achieve a latency of 512 samples 
at best. This is unacceptable in a pro-audio environment.


The current stack exhibits this problem, and I solve it by recalculating 
the max packet size, based upon the stream composition (i.e. expected 
packet size) and the requested audio buffer size, such that the 
interrupts are generated at a high enough frequency.


I'm not a kernel hacker, but when looking through the code I had the 
impression that smaller DMA buffers were possible (aren't smaller 
buffers used in packet-per-buffer mode?).




However, the other case is when the header buffer fills up.  In this
case, the DMA engine moves on to the next descriptor in the list and
starts from new in the payload buffer from that descriptor.  This
leaves a gap in the payload buffer associated with the old descriptor.
 Since this gap is within a page, we can't just map it away in the
linear user space mapping of the buffers, user space will see this gap
and have to compensate, by copying, for example.

We obviously want to avoid gaps in the payload buffer, so setting up
these descriptors, we need to make sure that the header buffer is big
enough to hold headers for all the packets it takes to fill up the
payload buffer.  Now the packetization process isn't deterministic -
in simple cases where the remote device is sampling using a clock
based off of the bus clock domain, then, for example, a 48kHz audio
signal can send 6 samples every cycle or maybe 3 packets with 8
samples and one empty packet consistently.  But if the AD converter is
driven using a seperate clock, there is going to be clock skew, and
suddenly there might be an extra empty packet.  And the thing is, even
without the clock skew problem, you don't know how the remote device
is going to throttle the packets.  All this to say that for a given
payload size, there is no way to reliably know how many packets the
remote device will use to transmit that payload.
A gap doesn't necessarily have to be a big problem as long as we know 
its position and size. I don't think it's a lot of overhead to skip a 
gap once in a while. Not having any would be better of course.


The ability to skip a gap will have to be implemented in (some) clients 
anyway, because it is not certain that a no-data packet won't contain 
payload. The current class driver for audio devices from Apple sends 
payload along with it's no-data packets. I don't really know if this is 
according to spec, but I assume so (haven't got them at hand).


In our application (FreeBoB), we know in advance what the size of a 
packet is going to be, as we only use blocking transmission. We also 
know how many (non no-data) packets we want to receive before being 
notified (some fraction of the audio 

Re: In-tree version of new FireWire drivers available

2007-01-24 Thread David Moore
On Wed, 2007-01-24 at 19:30 -0500, Kristian Høgsberg wrote:

> The problem isn't packets that are too big, the problem is we can get
> unexpected / too many empty packets, which will then cause the DMA
> engine to move on to the next descriptor before the payload buffer has
> been completely filled. 

I was under the impression that for an empty packet, the descriptor
wouldn't fire at all.  Is that not correct?

-David

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread David Moore

Kristian Høgsberg wrote:


 - Always allocate a page for headers and a page for the payload.


This method would not really be acceptable as a replacement for 
video1394, since video buffers can often be many tens of megabytes. 
Doubling the space requirement would be a big deal and may cause the 
allocation to fail.


How about this solution:

Give userspace the option of specifying the exact size of the received 
ISO packets.  If userspace provides this, you can allocate the dual 
buffer descriptors appropriately.


If userspace does not know the exact size of the received ISO packets or 
does not specify it, then use dual buffer descriptors for only the first 
packet, and use regular buffer fill for the remaining packets.


I suggest this strategy because I can't think of an application that 
needs all the packet headers, yet doesn't know the exact size of the 
packet.  Is there such an application?


-David
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread Stefan Richter
Kristian Høgsberg wrote:
> Hosting this on freedesktop.org should be fine,
> though kernel.org may be more appropriate :)

At the moment, a host outside of kernel.org is actually better for
gitweb lurkers. (But I trust that kernel.org will get up to speed again
soon.)

> Stefan, I'm still not sure what the work flow should be
> here, do you want to just pull these changes or should I send the 13
> patches to linux1394-devel?

IMO: Posting patches to linux1394-devel is a must for anything that is
meant to go to -mm or ultimately to mainline. In some cases, patches
should also be Cc'd to linux-kernel or specialized lists like linux-scsi
or linuxppc-dev etc. depending on the issue. I think linux1394-devel can
bear the additional traffic from this new development.

The question is whether you will post what you wrote, or if I will post
what I pulled from you...
-- 
Stefan Richter
-=-=-=== ---= ==---
http://arcgraph.de/sr/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread Pieter Palmers

Kristian Høgsberg wrote:

Changes since the merge into the linux1394 tree include:

 - gap count optimization
 - full bus management
 - loopback for async requests to the local node
 - a bug fix for a problem exposed by VIA 6306 controllers
 - a typo fix from the bitfield -> mask+shift conversion.


Kristian,

What is your ETA on a the completion of the isochronous interface?

Pieter
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread Pieter Palmers

Kristian Høgsberg wrote:

Changes since the merge into the linux1394 tree include:

 - gap count optimization
 - full bus management
 - loopback for async requests to the local node
 - a bug fix for a problem exposed by VIA 6306 controllers
 - a typo fix from the bitfield - mask+shift conversion.


Kristian,

What is your ETA on a the completion of the isochronous interface?

Pieter
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread Stefan Richter
Kristian Høgsberg wrote:
 Hosting this on freedesktop.org should be fine,
 though kernel.org may be more appropriate :)

At the moment, a host outside of kernel.org is actually better for
gitweb lurkers. (But I trust that kernel.org will get up to speed again
soon.)

 Stefan, I'm still not sure what the work flow should be
 here, do you want to just pull these changes or should I send the 13
 patches to linux1394-devel?

IMO: Posting patches to linux1394-devel is a must for anything that is
meant to go to -mm or ultimately to mainline. In some cases, patches
should also be Cc'd to linux-kernel or specialized lists like linux-scsi
or linuxppc-dev etc. depending on the issue. I think linux1394-devel can
bear the additional traffic from this new development.

The question is whether you will post what you wrote, or if I will post
what I pulled from you...
-- 
Stefan Richter
-=-=-=== ---= ==---
http://arcgraph.de/sr/
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread David Moore

Kristian Høgsberg wrote:


 - Always allocate a page for headers and a page for the payload.


This method would not really be acceptable as a replacement for 
video1394, since video buffers can often be many tens of megabytes. 
Doubling the space requirement would be a big deal and may cause the 
allocation to fail.


How about this solution:

Give userspace the option of specifying the exact size of the received 
ISO packets.  If userspace provides this, you can allocate the dual 
buffer descriptors appropriately.


If userspace does not know the exact size of the received ISO packets or 
does not specify it, then use dual buffer descriptors for only the first 
packet, and use regular buffer fill for the remaining packets.


I suggest this strategy because I can't think of an application that 
needs all the packet headers, yet doesn't know the exact size of the 
packet.  Is there such an application?


-David
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: In-tree version of new FireWire drivers available

2007-01-24 Thread David Moore
On Wed, 2007-01-24 at 19:30 -0500, Kristian Høgsberg wrote:

 The problem isn't packets that are too big, the problem is we can get
 unexpected / too many empty packets, which will then cause the DMA
 engine to move on to the next descriptor before the payload buffer has
 been completely filled. 

I was under the impression that for an empty packet, the descriptor
wouldn't fire at all.  Is that not correct?

-David

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


In-tree version of new FireWire drivers available

2007-01-23 Thread Kristian Høgsberg

Hi,

I've moved the new FireWire stack to an in-tree git repository and
moved over the missing patches from my out-of-tree version.  The tree
is available over here:

 git://people.freedesktop.org/~krh/linux-2.6

with gitweb avialable here:

 http://gitweb.freedesktop.org/?p=users/krh/linux-2.6.git;a=summary

There's only one branch there which is branched of off Stefan Richters
master from the linux1394 repo, and has the most recent work from my
out-of -tree repo.  Hosting this on freedesktop.org should be fine,
though kernel.org may be more appropriate :)

Changes since the merge into the linux1394 tree include:

- gap count optimization
- full bus management
- loopback for async requests to the local node
- a bug fix for a problem exposed by VIA 6306 controllers
- a typo fix from the bitfield -> mask+shift conversion.

Plus I've merged Stefans recent fixes and resolved the few conflicts
from that.  Stefan, I'm still not sure what the work flow should be
here, do you want to just pull these changes or should I send the 13
patches to linux1394-devel?

cheers,
Kristian
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


In-tree version of new FireWire drivers available

2007-01-23 Thread Kristian Høgsberg

Hi,

I've moved the new FireWire stack to an in-tree git repository and
moved over the missing patches from my out-of-tree version.  The tree
is available over here:

 git://people.freedesktop.org/~krh/linux-2.6

with gitweb avialable here:

 http://gitweb.freedesktop.org/?p=users/krh/linux-2.6.git;a=summary

There's only one branch there which is branched of off Stefan Richters
master from the linux1394 repo, and has the most recent work from my
out-of -tree repo.  Hosting this on freedesktop.org should be fine,
though kernel.org may be more appropriate :)

Changes since the merge into the linux1394 tree include:

- gap count optimization
- full bus management
- loopback for async requests to the local node
- a bug fix for a problem exposed by VIA 6306 controllers
- a typo fix from the bitfield - mask+shift conversion.

Plus I've merged Stefans recent fixes and resolved the few conflicts
from that.  Stefan, I'm still not sure what the work flow should be
here, do you want to just pull these changes or should I send the 13
patches to linux1394-devel?

cheers,
Kristian
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/