--- storage-dist/transport.c	Tue Dec 18 14:22:07 2001
+++ storage/transport.c	Fri Jan  4 09:09:56 2002
@@ -54,6 +54,8 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#define DO_BULK_QUEUE			/* no schedule between s/g bufs */
+
 /***********************************************************************
  * Helper routines
  ***********************************************************************/
@@ -482,15 +484,248 @@
 }
 
 /*
- * Transfer one SCSI scatter-gather buffer via bulk transfer
- *
- * Note that this function is necessary because we want the ability to
- * use scatter-gather memory.  Good performance is achieved by a combination
- * of scatter-gather and clustering (which makes each chunk bigger).
- *
- * Note that the lower layer will always retry when a NAK occurs, up to the
- * timeout limit.  Thus we don't have to worry about it for individual
- * packets.
+ * Map USB error status to US_BULK status codes
+ */
+static int map_status (int result)
+{
+	/* uh oh... we have an error code, so something went wrong. */
+	if (result) {
+		/* NAK - that means we've retried a few times already */
+		if (result == -ETIMEDOUT) {
+			US_DEBUGP("map_status: device NAKed\n");
+			return US_BULK_TRANSFER_FAILED;
+		}
+
+		/* -ENOENT -- we canceled this transfer (disconnect) */
+		if (result == -ENOENT) {
+			US_DEBUGP("map_status: transfer aborted\n");
+			return US_BULK_TRANSFER_ABORTED;
+		}
+
+		/* the catch-all case */
+		US_DEBUGP("map_status: unknown error\n");
+		return US_BULK_TRANSFER_FAILED;
+	}
+
+	/* no error code, so we must have transferred some data, 
+	 * just not all of it */
+	return US_BULK_TRANSFER_SHORT;
+}
+
+#ifdef	DO_BULK_QUEUE
+
+struct sg_result {
+	int			status;
+	struct completion	complete;
+};
+
+static void sg_completion (struct urb *urb)
+{
+	struct urb		*base = (struct urb *)urb->context;
+	unsigned		index = urb - base;
+	struct sg_result	*result = (struct sg_result *) base->context;
+
+	/* error detected?  */
+	if (result->status == 0 && urb->status != 0) {
+		unsigned long	flags;
+		int		i;
+
+		/* prevent processing of any more of these URBs */
+		local_irq_save (flags);
+		for (i = base->number_of_packets; i > index; --i) {
+			spin_lock (&base [i].lock);
+			/* if not yet queued ... make submitting it fail */
+			if (base [i].status == 0)
+				base [i].dev = 0;
+			/* else it's already queued ... HC will unlink it */
+			spin_unlock (&base [i].lock);
+		}
+		local_irq_restore (flags);
+
+		/* save the error */
+		result->status = urb->status;
+	}
+
+	/* when last URB is given to us, free them and report completion. */
+	if (index == base->number_of_packets) {
+		kfree (base);
+		complete (&result->complete);
+	}
+}
+
+/*
+ * Queues an array of I/O requests as a series of bulk URBs, and returns
+ * status when they've all completed (or on fatal submit error).
+ * Returns mapped status code (not negative errno/zero).
+ */
+static int queueing_bulk_sg (
+	struct usb_device	*dev,
+	int			pipe,
+	unsigned		total,
+	struct scatterlist	*sg_data,
+	unsigned		sg_len
+)
+{
+	struct urb		*urbs;
+	unsigned		i;
+	struct sg_result	result;
+
+	i = (sg_len + 1) * sizeof *urbs;
+	urbs = kmalloc (i, GFP_NOIO);
+	if (!urbs)
+		return map_status (-ENOMEM);
+	memset (urbs, 0, i);
+
+	result.status = 0;
+	init_completion (&result.complete);
+	
+	/* use urbs [0] for housekeeping ... */
+	urbs [0].context = &result;
+	urbs = &urbs [1];
+
+	/* ... init the other urbs, first for fault handling ... */
+	for (i = 0; i < sg_len; i++) {
+		/* currently no way to use highmem for USB I/O
+		 * not necessary in 2.4, will be required in 2.5 ...
+		 */
+		if (sg_data [i].address == 0) {
+			US_DEBUGP("ERROR: no highem support!\n");
+			kfree (&urbs [-1]);
+			return map_status (-EINVAL);
+		}
+		FILL_BULK_URB (&urbs [i], dev, pipe,
+			sg_data [i].address, sg_data [i].length,
+			sg_completion, &urbs [-1]);
+		if (urbs [i].transfer_buffer_length > total) {
+			US_DEBUGP("WARN: s/g err, data[%d].length = %d (%d)\n",
+				i, urbs [i].transfer_buffer_length, total);
+			urbs [i].transfer_buffer_length = total;
+			sg_len = i;
+		}
+		total -= urbs [i].transfer_buffer_length;
+		/* status and most other fields already initted as zero */
+		urbs [i].transfer_flags = USB_QUEUE_BULK;
+	}
+	urbs [-1].number_of_packets = sg_len;
+
+	if (total != 0)
+		US_DEBUGP("ERROR: s/g leftover = %d\n", total);
+
+	/* ... then queue them in sequence.  completions will be firing
+	 * (especially at high speed) as we queue longer lists.
+	 */
+	for (i = 0; i < sg_len; i++) {
+		int	status;
+		int	retries = 0;
+		int	j;
+
+retry:
+		status = usb_submit_urb (&urbs [i]);
+		switch (status) {
+
+		/* success? */
+		case 0:
+			continue;
+
+		/* HCD ran out of TDs or DMA mappings? */
+		case -ENOMEM:
+			if (i != 0 && retries++ < 3) {
+				/* if we submitted one already, completion
+				 * frees resources so we may progress.
+				 * wait a few frames then retry.
+				 */
+				US_DEBUGP("s/g submit retry\n");
+				set_current_state (TASK_UNINTERRUPTIBLE);
+				schedule_timeout ((HZ * 1)/100);
+				goto retry;
+			}
+			/* FALLTHROUGH */
+
+		/* something we can't recover from?  failure from any urb
+		 * (possibly on another cpu!) blocks more submissions.
+		 * report the root cause and clean up.
+		 */
+		default:
+			US_DEBUGP("s/g submit error %d (%d)\n", tmp,
+				result.status);
+			if (result.status != 0)
+				status = result.status;
+
+			/* synchronous unlink; these should all be NOPs */
+			for (j = 0; j < i; j++) 
+				usb_unlink_urb (&urbs [j]);
+			kfree (&urbs [-1]);
+
+			/* this is an error, minimally a short transfer */
+			return map_status (status);
+		}
+	}
+
+	/* ... wait for the last completion to fire */
+	wait_for_completion (&result.complete);
+	return result.status
+	   	? map_status (result.status)
+		: US_BULK_TRANSFER_GOOD;
+}
+
+#endif	/* DO_BULK_QUEUE */
+
+/*
+ * Transfer one SCSI scatter-gather buffer via USB bulk transfers.
+ * Returns mapped status code (not USB status code).
+ */
+static inline int bulk_sg (
+	struct us_data		*us,
+	unsigned		transfer_amount,
+	struct scatterlist	*sg,
+	unsigned		sglen
+) {
+#ifdef DO_BULK_QUEUE
+	int	pipe;
+
+	if (us->srb->sc_data_direction == SCSI_DATA_READ)
+		pipe = usb_rcvbulkpipe(us->pusb_dev, us->ep_in);
+	else
+		pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
+
+	/* Maximizes bandwidth: no reschedule-between-transfer needed.
+	 * That matters most when pusb_dev->speed == USB_SPEED_HIGH.
+	 */
+	return queueing_bulk_sg (us->pusb_dev, pipe,
+			transfer_amount, sg, sglen);
+#else
+	int		result = US_BULK_TRANSFER_GOOD;
+	unsigned int	total_transferred = 0;
+	int		i;
+
+	for (i = 0; i < sglen; i++) {
+
+		/* transfer the lesser of the next buffer or the
+		 * remaining data, rescheduling between buffers.
+		 */
+		if (transfer_amount - total_transferred >= 
+				sg[i].length) {
+			result = usb_stor_transfer_partial(us,
+				sg[i].address, sg[i].length);
+			total_transferred += sg[i].length;
+		} else
+			result = usb_stor_transfer_partial(us,
+				sg[i].address,
+				transfer_amount - total_transferred);
+
+		/* if we get an error, end the loop here */
+		if (result)
+			break;
+	}
+	return result;
+#endif
+}
+
+/*
+ * Blocking non-queued bulk transfer; required for cases when the SCSI
+ * layer doesn't pass scatterlists.
+ * (And also used, for now, if we don't trust the HC's bulk queuing...)
+ * Should be file-private...
  */
 int usb_stor_transfer_partial(struct us_data *us, char *buf, int length)
 {
@@ -521,45 +756,16 @@
 		US_DEBUGP("usb_stor_transfer_partial(): transfer complete\n");
 		return US_BULK_TRANSFER_GOOD;
 	}
-
-	/* uh oh... we have an error code, so something went wrong. */
-	if (result) {
-		/* NAK - that means we've retried a few times already */
-		if (result == -ETIMEDOUT) {
-			US_DEBUGP("usb_stor_transfer_partial(): device NAKed\n");
-			return US_BULK_TRANSFER_FAILED;
-		}
-
-		/* -ENOENT -- we canceled this transfer */
-		if (result == -ENOENT) {
-			US_DEBUGP("usb_stor_transfer_partial(): transfer aborted\n");
-			return US_BULK_TRANSFER_ABORTED;
-		}
-
-		/* the catch-all case */
-		US_DEBUGP("usb_stor_transfer_partial(): unknown error\n");
-		return US_BULK_TRANSFER_FAILED;
-	}
-
-	/* no error code, so we must have transferred some data, 
-	 * just not all of it */
-	return US_BULK_TRANSFER_SHORT;
+	return map_status (result);
 }
 
 /*
  * Transfer an entire SCSI command's worth of data payload over the bulk
- * pipe.
- *
- * Note that this uses usb_stor_transfer_partial to achieve it's goals -- this
- * function simply determines if we're going to use scatter-gather or not,
- * and acts appropriately.  For now, it also re-interprets the error codes.
+ * pipe, using scatter/gather if appropriate. 
  */
 void usb_stor_transfer(Scsi_Cmnd *srb, struct us_data* us)
 {
-	int i;
 	int result = -1;
-	struct scatterlist *sg;
-	unsigned int total_transferred = 0;
 	unsigned int transfer_amount;
 
 	/* calculate how much we want to transfer */
@@ -571,31 +777,10 @@
 		transfer_amount = srb->request_bufflen;
 
 	/* are we scatter-gathering? */
-	if (srb->use_sg) {
-
-		/* loop over all the scatter gather structures and 
-		 * make the appropriate requests for each, until done
-		 */
-		sg = (struct scatterlist *) srb->request_buffer;
-		for (i = 0; i < srb->use_sg; i++) {
-
-			/* transfer the lesser of the next buffer or the
-			 * remaining data */
-			if (transfer_amount - total_transferred >= 
-					sg[i].length) {
-				result = usb_stor_transfer_partial(us,
-						sg[i].address, sg[i].length);
-				total_transferred += sg[i].length;
-			} else
-				result = usb_stor_transfer_partial(us,
-						sg[i].address,
-						transfer_amount - total_transferred);
-
-			/* if we get an error, end the loop here */
-			if (result)
-				break;
-		}
-	}
+	if (srb->use_sg)
+		result = bulk_sg (us, transfer_amount,
+			(struct scatterlist *)srb->request_buffer,
+			srb->use_sg);
 	else
 		/* no scatter-gather, just make the request */
 		result = usb_stor_transfer_partial(us, srb->request_buffer, 
