Want more buffer cache? please have a try with this.
This diff breaks the buffer cache into the dma'able region, and the
above dma-able region of memory. buffers are always allocated in
the dma'able region, and as they age they are moved above the dma'able
region if such memory exists. I/O operations on buffers in high
memory flip the buffer back into dma-able memory first.
With this diff you can have huge tracts of buffer cache on amd64 but this
also needs testing on all arch's.
Index: kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.206
diff -u -p -r1.206 kern_sysctl.c
--- kern_sysctl.c 5 Jul 2011 04:48:02 -0000 1.206
+++ kern_sysctl.c 7 Jul 2011 21:09:33 -0000
@@ -112,6 +112,7 @@ extern struct disklist_head disklist;
extern fixpt_t ccpu;
extern long numvnodes;
extern u_int mcllivelocks;
+extern psize_t b_dmapages_total, b_highpages_total, b_dmamaxpages;
extern void nmbclust_update(void);
@@ -566,8 +567,8 @@ kern_sysctl(int *name, u_int namelen, vo
return (sysctl_int(oldp, oldlenp, newp, newlen,
&rthreads_enabled));
case KERN_CACHEPCT: {
- u_int64_t dmapages;
- int opct, pgs;
+ psize_t pgs;
+ int opct;
opct = bufcachepercent;
error = sysctl_int(oldp, oldlenp, newp, newlen,
&bufcachepercent);
@@ -577,11 +578,13 @@ kern_sysctl(int *name, u_int namelen, vo
bufcachepercent = opct;
return (EINVAL);
}
- dmapages = uvm_pagecount(&dma_constraint);
if (bufcachepercent != opct) {
- pgs = bufcachepercent * dmapages / 100;
+ pgs = (b_highpages_total + b_dmapages_total)
+ * bufcachepercent / 100;
+ b_dmamaxpages = b_dmapages_total * bufcachepercent
+ / 100;
bufadjust(pgs); /* adjust bufpages */
- bufhighpages = bufpages; /* set high water mark */
+ bufhighpages = bufpages;
}
return(0);
}
Index: spec_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/spec_vnops.c,v
retrieving revision 1.67
diff -u -p -r1.67 spec_vnops.c
--- spec_vnops.c 5 Jul 2011 05:37:07 -0000 1.67
+++ spec_vnops.c 6 Jul 2011 22:44:00 -0000
@@ -457,7 +457,9 @@ spec_strategy(void *v)
struct vop_strategy_args *ap = v;
struct buf *bp = ap->a_bp;
int maj = major(bp->b_dev);
-
+
+ if (!ISSET(bp->b_flags, B_DAQ) && ISSET(bp->b_flags, B_BC))
+ panic("bogus buf passed to spec_strategy");
if (LIST_FIRST(&bp->b_dep) != NULL)
buf_start(bp);
Index: vfs_bio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.133
diff -u -p -r1.133 vfs_bio.c
--- vfs_bio.c 6 Jul 2011 20:50:05 -0000 1.133
+++ vfs_bio.c 7 Jul 2011 21:34:52 -0000
@@ -68,9 +68,13 @@
#define BQ_DIRTY 0 /* LRU queue with dirty buffers
*/
#define BQ_CLEAN 1 /* LRU queue with clean buffers
*/
-TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
-int needbuffer;
+struct uvm_constraint_range high_constraint;
struct bio_ops bioops;
+TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
+TAILQ_HEAD(bqda, buf) bufqueue_da;
+psize_t b_dmapages_total, b_highpages_total, b_dmamaxpages;
+int needbuffer,needda;
+int needda;
/*
* Buffer pool for I/O buffers.
@@ -87,12 +91,13 @@ void buf_put(struct buf *);
struct buf *bio_doread(struct vnode *, daddr64_t, int, int);
struct buf *buf_get(struct vnode *, daddr64_t, size_t);
+struct buf *buf_remove_from_freelist(struct buf *);
void bread_cluster_callback(struct buf *);
/*
* We keep a few counters to monitor the utilization of the buffer cache
*
- * numbufpages - number of pages totally allocated.
+ * numbufpages - number of pages totally allocated.
* numdirtypages - number of pages on BQ_DIRTY queue.
* lodirtypages - low water mark for buffer cleaning daemon.
* hidirtypages - high water mark for buffer cleaning daemon.
@@ -110,14 +115,112 @@ long hicleanpages;
long maxcleanpages;
long backoffpages; /* backoff counter for page allocations */
long buflowpages; /* bufpages low water mark */
-long bufhighpages; /* bufpages high water mark */
-long bufbackpages; /* number of pages we back off when asked to shrink */
+long bufhighpages; /* bufpages high water mark */
+long bufbackpages; /* number of pages we back off when asked to shrink */
+
+/* XXX - should be defined here but we have md issues */
+extern int bufcachepercent;
vsize_t bufkvm;
struct proc *cleanerproc;
int bd_req; /* Sleep point for cleaner daemon. */
+/* nuke a buf off it's freelist - returns next buf. skips busy buffers */
+struct buf *
+buf_remove_from_freelist(struct buf * bp)
+{
+ struct buf * nbp;
+ nbp = TAILQ_NEXT(bp, b_freelist);
+ /* skip busy buffers */
+ if (!ISSET(bp->b_flags, B_BUSY)) {
+ bremfree(bp);
+ if (bp->b_vp) {
+ RB_REMOVE(buf_rb_bufs,
+ &bp->b_vp->v_bufs_tree, bp);
+ brelvp(bp);
+ }
+ buf_put(bp);
+ }
+ return(nbp);
+}
+/*
+ * Add buf to the head of the dma reachable queue
+ * and ensure that it is dma reachable.
+ */
+void
+buf_daq_add(struct buf *buf)
+{
+ struct buf *b;
+ int s;
+
+start:
+ KASSERT(ISSET(buf->b_flags, B_BC));
+ KASSERT(ISSET(buf->b_flags, B_BUSY));
+ KASSERT(buf->b_pobj != NULL);
+ s = splbio();
+ /*
+ * if we are adding to the queue, ensure we free down below the
+ * max
+ */
+ while (b_highpages_total &&
+ (!ISSET(buf->b_flags, B_DAQ)) && (!ISSET(buf->b_flags, B_DMA)) &&
+ (bcstats.dmapages > (b_dmamaxpages - atop(buf->b_bufsize)))) {
+ b = TAILQ_FIRST(&bufqueue_da);
+ /* find first non-busy buffer */
+ while (b && ISSET(b->b_flags, B_BUSY))
+ b = TAILQ_NEXT(b, b_qda);
+ if (b == NULL) {
+ /* no non-busy buffers. */
+ needda++;
+ tsleep(&needda, PRIBIO, "needda", 0);
+ needda--;
+ splx(s);
+ goto start;
+ } else {
+ if (b_highpages_total) {
+ buf_acquire_unmapped(b);
+ /* move buffer to above dma reachable memory */
+ TAILQ_REMOVE(&bufqueue_da, b, b_qda);
+ buf_realloc_pages(b, &high_constraint);
+ if (ISSET(b->b_flags, B_DMA))
+ panic("B_DMA after high flip %p", b);
+ CLR(b->b_flags, B_DAQ);
+ buf_release(b);
+ splx(s);
+ goto start;
+ } else {
+ /* no high pages to flip to. */
+ needda++;
+ tsleep(&needda, PRIBIO, "needda", 0);
+ needda--;
+ splx(s);
+ goto start;
+ }
+ }
+ }
+ /* don't copy it if it's already in dma reachable memory */
+ if (ISSET(buf->b_flags, B_DMA)) {
+ /* buf already there, just move it to the end */
+ if (ISSET(buf->b_flags, B_DAQ))
+ TAILQ_REMOVE(&bufqueue_da, buf, b_qda);
+ TAILQ_INSERT_TAIL(&bufqueue_da, buf, b_qda);
+ SET(buf->b_flags, B_DAQ);
+ } else {
+ if (ISSET(buf->b_flags, B_DAQ))
+ panic("non-dma buffer on dma queue %p\n", buf);
+ /* move buf to dma reachable memory */
+ buf_realloc_pages(buf, &dma_constraint);
+ if (!ISSET(buf->b_flags, B_DMA))
+ panic("non-dma buffer after dma move %p\n", buf);
+ TAILQ_INSERT_TAIL(&bufqueue_da, buf, b_qda);
+ SET(buf->b_flags, B_DAQ);
+ }
+ splx(s);
+ return;
+
+}
+
void
bremfree(struct buf *bp)
{
@@ -139,11 +242,10 @@ bremfree(struct buf *bp)
if (dp == &bufqueues[BQUEUES])
panic("bremfree: lost tail");
}
- if (!ISSET(bp->b_flags, B_DELWRI)) {
+ if (!ISSET(bp->b_flags, B_DELWRI))
bcstats.numcleanpages -= atop(bp->b_bufsize);
- } else {
+ else
bcstats.numdirtypages -= atop(bp->b_bufsize);
- }
TAILQ_REMOVE(dp, bp, b_freelist);
bcstats.freebufs--;
}
@@ -175,7 +277,10 @@ buf_put(struct buf *bp)
if (backoffpages < 0)
backoffpages = 0;
}
-
+ if (ISSET(bp->b_flags, B_DAQ)) {
+ TAILQ_REMOVE(&bufqueue_da, bp, b_qda);
+ CLR(bp->b_flags, B_DAQ);
+ }
if (buf_dealloc_mem(bp) != 0)
return;
pool_put(&bufpool, bp);
@@ -187,10 +292,22 @@ buf_put(struct buf *bp)
void
bufinit(void)
{
- u_int64_t dmapages;
struct bqueues *dp;
- dmapages = uvm_pagecount(&dma_constraint);
+ bufhighpages = buflowpages = bufpages = bufcachepercent = bufkvm = 0;
+ /*
+ * XXX note this really is "high" - i.e. *above* dma_constraint
+ */
+ high_constraint.ucr_low = dma_constraint.ucr_high;
+ high_constraint.ucr_high = no_constraint.ucr_high;
+
+ /* do we have memory above dma_constraint, or not? */
+ if (high_constraint.ucr_low != high_constraint.ucr_high) {
+ high_constraint.ucr_low++;
+ b_highpages_total = uvm_pagecount(&high_constraint);
+ } else
+ b_highpages_total = 0;
+ b_dmapages_total = uvm_pagecount(&dma_constraint);
/*
* If MD code doesn't say otherwise, use 10% of kvm for mappings and
@@ -199,25 +316,31 @@ bufinit(void)
if (bufcachepercent == 0)
bufcachepercent = 10;
if (bufpages == 0)
- bufpages = dmapages * bufcachepercent / 100;
+ bufpages = (b_highpages_total + b_dmapages_total)
+ * bufcachepercent / 100;
bufhighpages = bufpages;
+ b_dmamaxpages = b_dmapages_total * bufcachepercent / 100;
+
+ printf("buffer cache from %d dma pages and %d high pages\n",
+ b_dmapages_total, b_highpages_total);
/*
* set the base backoff level for the buffer cache to bufpages.
* we will not allow uvm to steal back more than this number of
* pages
*/
- buflowpages = dmapages * 10 / 100;
+ buflowpages = b_dmapages_total * 10 / 100;
/*
- * set bufbackpages to 100 pages, or 10 percent of the low water mark
- * if we don't have that many pages.
+ * set bufbackpages to 1 MB worth or pages, or 10 percent of
+ * the low water mark if we don't have that many pages.
*/
bufbackpages = buflowpages * 10 / 100;
- if (bufbackpages > 100)
- bufbackpages = 100;
+
+ if (bufbackpages > (1048576 / PAGE_SIZE))
+ bufbackpages = (1048576 / PAGE_SIZE);
if (bufkvm == 0)
bufkvm = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 10;
@@ -238,15 +361,16 @@ bufinit(void)
pool_setipl(&bufpool, IPL_BIO);
for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
TAILQ_INIT(dp);
+ TAILQ_INIT(&bufqueue_da);
/*
* hmm - bufkvm is an argument because it's static, while
* bufpages is global because it can change while running.
- */
+ */
buf_mem_init(bufkvm);
- hidirtypages = (bufpages / 4) * 3;
- lodirtypages = bufpages / 2;
+ hidirtypages = (b_dmamaxpages / 4) * 3;
+ lodirtypages = b_dmamaxpages / 2;
/*
* When we hit 95% of pages being clean, we bring them down to
@@ -259,6 +383,39 @@ bufinit(void)
}
/*
+ * Flip some dma reachable cache pages high
+ */
+void
+bufhigh(int delta)
+{
+ psize_t newdmapages;
+ struct buf *b;
+ int s;
+
+ if (!b_highpages_total)
+ return;
+ s = splbio();
+ newdmapages = bcstats.dmapages - delta;
+ while ((bcstats.dmapages > newdmapages) &&
+ (b = TAILQ_FIRST(&bufqueue_da))) {
+ while (ISSET(b->b_flags, B_BUSY))
+ b = TAILQ_NEXT(b, b_qda);
+ if (b != NULL) {
+ buf_acquire_unmapped(b);
+ /* move buffer to above dma reachable memory */
+ buf_realloc_pages(b, &high_constraint);
+ if (ISSET(b->b_flags, B_DMA))
+ panic("DMA flagged buffer after high flip %p",
b);
+ TAILQ_REMOVE(&bufqueue_da, b, b_qda);
+ CLR(b->b_flags, B_DAQ);
+ buf_release(b);
+ }
+ }
+ wakeup(&needda);
+ splx(s);
+}
+
+/*
* Change cachepct
*/
void
@@ -272,10 +429,19 @@ bufadjust(int newbufpages)
int s;
s = splbio();
+ /* XXX for hibernate - throw away everything we can.*/
+ if (newbufpages == 0) {
+ bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
+ while (bp)
+ bp = buf_remove_from_freelist(bp);
+ splx(s);
+ return;
+ }
+
bufpages = newbufpages;
- hidirtypages = (bufpages / 4) * 3;
- lodirtypages = bufpages / 2;
+ hidirtypages = (b_dmamaxpages / 4) * 3;
+ lodirtypages = b_dmamaxpages / 2;
/*
* When we hit 95% of pages being clean, we bring them down to
@@ -291,16 +457,9 @@ bufadjust(int newbufpages)
* free them up to get back down. this may possibly consume
* all our clean pages...
*/
- while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) &&
- (bcstats.numbufpages > bufpages)) {
- bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
- brelvp(bp);
- }
- buf_put(bp);
- }
+ bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
+ while (bp && (bcstats.numbufpages > bufpages))
+ bp = buf_remove_from_freelist(bp);
/*
* Wake up cleaner if we're getting low on pages. We might
@@ -336,23 +495,39 @@ bufbackoff(struct uvm_constraint_range *
* On success, it frees N pages from the buffer cache, and sets
* a flag so that the next N allocations from buf_get will recycle
* a buffer rather than allocate a new one. It then returns 0 to the
- * caller.
+ * caller.
*
* on failure, it could free no pages from the buffer cache, does
- * nothing and returns -1 to the caller.
+ * nothing and returns -1 to the caller.
+ */
+
+ psize_t d, s;
+
+ /*
+ * back of by at least bufbackpages, or bufbackpages + what
+ * the pagedaemon needs if it happens to know when it calls us
*/
- long d;
+ s = (size > 0) ? bufbackpages + size : bufbackpages;
- if (bufpages <= buflowpages)
+ if (bufpages <= buflowpages)
return(-1);
- if (bufpages - bufbackpages >= buflowpages)
- d = bufbackpages;
+ if (bufpages - s >= buflowpages)
+ d = s;
else
d = bufpages - buflowpages;
- backoffpages = bufbackpages;
- bufadjust(bufpages - d);
- backoffpages = bufbackpages;
+
+ if (b_highpages_total
+ && (range->ucr_high <= dma_constraint.ucr_high)) {
+ if (bcstats.dmapages - s > b_dmamaxpages)
+ s += (bcstats.dmapages - b_dmamaxpages);
+ bufhigh(s);
+ }
+ else {
+ backoffpages = bufbackpages;
+ bufadjust(bufpages - d);
+ backoffpages = bufbackpages;
+ }
return(0);
}
@@ -534,12 +709,18 @@ bread_cluster(struct vnode *vp, daddr64_
for (i = 1; i < howmany; i++) {
bcstats.pendingreads++;
bcstats.numreads++;
- SET(xbpp[i]->b_flags, B_READ | B_ASYNC);
+ /*
+ * We set B_DMA here because bp above should be
+ * and we are playing buffer slice-n-dice games
+ * from the memory allocated in bp.
+ */
+ SET(xbpp[i]->b_flags, B_DMA | B_READ | B_ASYNC);
xbpp[i]->b_blkno = sblkno + (i * inc);
xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
xbpp[i]->b_data = NULL;
xbpp[i]->b_pobj = bp->b_pobj;
xbpp[i]->b_poffs = bp->b_poffs + (i * size);
+ buf_daq_add(xbpp[i]);
}
KASSERT(bp->b_lblkno == blkno + 1);
@@ -618,7 +799,7 @@ bwrite(struct buf *bp)
reassignbuf(bp);
} else
curproc->p_stats->p_ru.ru_oublock++;
-
+
/* Initiate disk write. Make sure the appropriate party is charged. */
bp->b_vp->v_numoutput++;
@@ -793,6 +974,8 @@ brelse(struct buf *bp)
CLR(bp->b_flags, B_WANTED);
wakeup(bp);
}
+ if (ISSET(bp->b_flags, B_DMA) && needda)
+ wakeup(&needda);
if (bp->b_vp != NULL)
RB_REMOVE(buf_rb_bufs,
&bp->b_vp->v_bufs_tree, bp);
@@ -833,19 +1016,6 @@ brelse(struct buf *bp)
bcstats.freebufs++;
CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE | B_DEFERRED));
buf_release(bp);
-
- /* Wake up any processes waiting for any buffer to become free. */
- if (needbuffer) {
- needbuffer--;
- wakeup(&needbuffer);
- }
-
- /* Wake up any processes waiting for _this_ buffer to become free. */
- if (ISSET(bp->b_flags, B_WANTED)) {
- CLR(bp->b_flags, B_WANTED);
- wakeup(bp);
- }
-
splx(s);
}
@@ -981,16 +1151,9 @@ buf_get(struct vnode *vp, daddr64_t blkn
* free down to the low water mark.
*/
if (bcstats.numcleanpages > hicleanpages) {
- while (bcstats.numcleanpages > locleanpages) {
- bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
- bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
- brelvp(bp);
- }
- buf_put(bp);
- }
+ bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
+ while (bp && (bcstats.numcleanpages > locleanpages))
+ bp = buf_remove_from_freelist(bp);
}
npages = atop(round_page(size));
@@ -1002,15 +1165,9 @@ buf_get(struct vnode *vp, daddr64_t blkn
|| backoffpages) {
int freemax = 5;
int i = freemax;
- while ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) && i--)
{
- bremfree(bp);
- if (bp->b_vp) {
- RB_REMOVE(buf_rb_bufs,
- &bp->b_vp->v_bufs_tree, bp);
- brelvp(bp);
- }
- buf_put(bp);
- }
+ bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN]);
+ while (bp && i--)
+ bp = buf_remove_from_freelist(bp);
if (freemax == i &&
(bcstats.numbufpages + npages > bufpages)) {
needbuffer++;
@@ -1027,6 +1184,8 @@ buf_get(struct vnode *vp, daddr64_t blkn
splx(s);
return (NULL);
}
+ /* Mark buffer as the cache's */
+ SET(bp->b_flags, B_BC);
bp->b_freelist.tqe_next = NOLIST;
bp->b_synctime = time_uptime + 300;
@@ -1041,7 +1200,7 @@ buf_get(struct vnode *vp, daddr64_t blkn
* We insert the buffer into the hash with B_BUSY set
* while we allocate pages for it. This way any getblk
* that happens while we allocate pages will wait for
- * this buffer instead of starting its own guf_get.
+ * this buffer instead of starting its own buf_get.
*
* But first, we check if someone beat us to it.
*/
@@ -1067,10 +1226,9 @@ buf_get(struct vnode *vp, daddr64_t blkn
if (size) {
buf_alloc_pages(bp, round_page(size));
buf_map(bp);
+ buf_daq_add(bp);
}
-
splx(s);
-
return (bp);
}
@@ -1082,23 +1240,46 @@ buf_daemon(struct proc *p)
{
struct timeval starttime, timediff;
struct buf *bp;
- int s;
+ int s, nb, error;
cleanerproc = curproc;
s = splbio();
for (;;) {
+ struct buf *nbp;
if (bcstats.numdirtypages < hidirtypages)
tsleep(&bd_req, PRIBIO - 7, "cleaner", 0);
getmicrouptime(&starttime);
-
+start:
+ nb = 0;
while ((bp = TAILQ_FIRST(&bufqueues[BQ_DIRTY]))) {
struct timeval tv;
+ nbp = TAILQ_NEXT(bp, b_freelist);
if (bcstats.numdirtypages < lodirtypages)
break;
+ /*
+ * If we haven't found any other buffers to
+ * process and this last one is busy, wait for
+ * it and restart. otherwise, continue and
+ * process the rest of them..
+ *
+ */
+ if ((nb == 0) && (nbp == NULL) &&
+ ISSET(bp->b_flags, B_BUSY)) {
+ SET(bp->b_flags, B_WANTED);
+ error = tsleep(bp, PRIBIO + 1, "getblk", 0);
+ splx(s);
+ if (error)
+ return;
+ s = splbio();
+ goto start;
+ } else {
+ continue;
+ }
+ nb++;
bremfree(bp);
buf_acquire(bp);
splx(s);
@@ -1132,7 +1313,6 @@ buf_daemon(struct proc *p)
s = splbio();
if (timediff.tv_sec)
break;
-
}
}
}
Index: vfs_biomem.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_biomem.c,v
retrieving revision 1.17
diff -u -p -r1.17 vfs_biomem.c
--- vfs_biomem.c 7 Apr 2011 19:07:42 -0000 1.17
+++ vfs_biomem.c 7 Jul 2011 21:17:09 -0000
@@ -33,6 +33,8 @@ TAILQ_HEAD(,buf) buf_valist;
int buf_nkvmsleep;
extern struct bcachestats bcstats;
+extern int needbuffer;
+extern int needda;
/*
* Pages are allocated from a uvm object (we only use it for page storage,
@@ -99,6 +101,11 @@ buf_acquire_unmapped(struct buf *bp)
s = splbio();
SET(bp->b_flags, B_BUSY|B_NOTMAPPED);
+ /* XXX */
+ if (bp->b_data != NULL) {
+ TAILQ_REMOVE(&buf_valist, bp, b_valist);
+ bcstats.busymapped++;
+ }
splx(s);
}
@@ -170,6 +177,24 @@ buf_release(struct buf *bp)
}
}
CLR(bp->b_flags, B_BUSY|B_NOTMAPPED);
+ if (ISSET(bp->b_flags, B_DMA) && needda) {
+ wakeup(&needda);
+ }
+ /* Wake up any processes waiting for any buffer to become free. */
+ if (needbuffer) {
+ needbuffer--;
+ wakeup(&needbuffer);
+ }
+
+ /*
+ * Wake up any processes waiting for _this_ buffer to become
+ * free.
+ */
+
+ if (ISSET(bp->b_flags, B_WANTED)) {
+ CLR(bp->b_flags, B_WANTED);
+ wakeup(bp);
+ }
splx(s);
}
@@ -286,6 +311,8 @@ buf_alloc_pages(struct buf *bp, vsize_t
uvm_pagealloc_multi(buf_object, offs, size, UVM_PLA_WAITOK);
bcstats.numbufpages += atop(size);
+ bcstats.dmapages += atop(size);
+ SET(bp->b_flags, B_DMA);
bp->b_pobj = buf_object;
bp->b_poffs = offs;
bp->b_bufsize = size;
@@ -302,6 +329,7 @@ buf_free_pages(struct buf *bp)
KASSERT(bp->b_data == NULL);
KASSERT(uobj != NULL);
+ KASSERT(!ISSET(bp->b_flags, B_DAQ));
s = splbio();
@@ -316,11 +344,57 @@ buf_free_pages(struct buf *bp)
pg->wire_count = 0;
uvm_pagefree(pg);
bcstats.numbufpages--;
+ if (ISSET(bp->b_flags, B_DMA))
+ bcstats.dmapages--;
}
+ CLR(bp->b_flags, B_DMA);
splx(s);
}
-/*
- * XXX - it might make sense to make a buf_realloc_pages to avoid
- * bouncing through the free list all the time.
- */
+/* Reallocate a buf into a particular location specified by "where" */
+void
+buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where)
+{
+ vaddr_t va;
+ int dma;
+ int s, i;
+
+ s = splbio();
+ KASSERT(ISSET(bp->b_flags, B_BUSY));
+ dma = ISSET(bp->b_flags, B_DMA);
+
+ /* if the original buf is mapped, unmap it */
+ if (bp->b_data != NULL) {
+ va = (vaddr_t)bp->b_data;
+ pmap_kremove(va, bp->b_bufsize);
+ pmap_update(pmap_kernel());
+ }
+ uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs, bp->b_bufsize,
+ UVM_PLA_WAITOK, where);
+ /*
+ * do this now, and put it back later when we know where we are
+ */
+ if (dma)
+ bcstats.dmapages -= atop(bp->b_bufsize);
+
+ dma = 1;
+ /* if the original buf was mapped, re-map it */
+ for (i = 0; i < atop(bp->b_bufsize); i++) {
+ struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
+ bp->b_poffs + ptoa(i));
+ KASSERT(pg != NULL);
+ if (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
+ dma = 0;
+ if (bp->b_data != NULL) {
+ pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
+ VM_PROT_READ|VM_PROT_WRITE);
+ pmap_update(pmap_kernel());
+ }
+ }
+ if (dma) {
+ SET(bp->b_flags, B_DMA);
+ bcstats.dmapages += atop(bp->b_bufsize);
+ } else
+ CLR(bp->b_flags, B_DMA);
+ splx(s);
+}
Index: vfs_vops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vops.c,v
retrieving revision 1.4
diff -u -p -r1.4 vfs_vops.c
--- vfs_vops.c 2 Jul 2011 15:52:25 -0000 1.4
+++ vfs_vops.c 6 Jul 2011 22:39:28 -0000
@@ -614,6 +614,17 @@ VOP_STRATEGY(struct buf *bp)
if (bp->b_vp->v_op->vop_strategy == NULL)
return (EOPNOTSUPP);
+ /*
+ * Flip buffer to dma reachable memory if
+ * necessary.
+ *
+ * XXX if you're making your own buffers and not
+ * having the buffer cache manage them then it's your
+ * problem to ensure they can be dma'ed to and from.
+ */
+ if (ISSET(bp->b_flags, B_BC))
+ buf_daq_add(bp);
+
return ((bp->b_vp->v_op->vop_strategy)(&a));
}