Re: [Qemu-devel] [PATCH V3 2/3] qemu: Generic asynchronous threading framework to offload tasks

2010-06-05 Thread Corentin Chary
On Fri, Jun 4, 2010 at 3:16 PM, Anthony Liguori anth...@codemonkey.ws wrote:
 On 06/03/2010 03:56 AM, Gautham R Shenoy wrote:

 From: Aneesh Kumar K.Vaneesh.ku...@linux.vnet.ibm.com

 This patch creates a generic asynchronous-task-offloading infrastructure.
 It's
 extracted out of the threading framework that is being used by paio.

 The reason for extracting out this generic infrastructure of the
 posix-aio-compat.c is so that other subsystems, such as virtio-9p could
 make use
 of it for offloading tasks that could block.

 [...@in.ibm.com: work_item_pool, async_work_init, async_work_release,
 async_cancel_work]

 Signed-off-by: Aneesh Kumar K.Vaneesh.ku...@linux.vnet.ibm.com
 Signed-off-by: Gautham R Shenoye...@in.ibm.com
 ---
  Makefile.objs |    3 +
  async-work.c  |  136
 +
  async-work.h  |   85 
  3 files changed, 223 insertions(+), 1 deletions(-)
  create mode 100644 async-work.c
  create mode 100644 async-work.h

 diff --git a/Makefile.objs b/Makefile.objs
 index ecdd53e..fd5ea4d 100644
 --- a/Makefile.objs
 +++ b/Makefile.objs
 @@ -9,6 +9,8 @@ qobject-obj-y += qerror.o

  block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
  block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o
 +block-obj-y += qemu-thread.o
 +block-obj-y += async-work.o
  block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o

 @@ -108,7 +110,6 @@ common-obj-y += iov.o
  common-obj-$(CONFIG_VNC_TLS) += vnc-tls.o vnc-auth-vencrypt.o
  common-obj-$(CONFIG_VNC_SASL) += vnc-auth-sasl.o
  common-obj-$(CONFIG_COCOA) += cocoa.o
 -common-obj-$(CONFIG_IOTHREAD) += qemu-thread.o
  common-obj-y += notify.o event_notifier.o
  common-obj-y += qemu-timer.o

 diff --git a/async-work.c b/async-work.c
 new file mode 100644
 index 000..0675732
 --- /dev/null
 +++ b/async-work.c
 @@ -0,0 +1,136 @@
 +/*
 + * Async work support
 + *
 + * Copyright IBM, Corp. 2010
 + *
 + * Authors:
 + *  Aneesh Kumar K.Vaneesh.ku...@linux.vnet.ibm.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2.  See
 + * the COPYING file in the top-level directory.


 Please preserve the original copyright of the copied code.

 + */
 +#includestdio.h
 +#includeerrno.h
 +#includestring.h
 +#includestdlib.h
 +#includesignal.h


 qemu-common.h should have all of these.  Generally, you should avoid
 including system headers because qemu headers take care of portability.

 +#include async-work.h
 +#include osdep.h
 +
 +static void async_abort(int err, const char *what)
 +{
 +    fprintf(stderr, %s failed: %s\n, what, strerror(err));
 +    abort();
 +}
 +
 +static void *async_worker_thread(void *data)
 +{
 +    struct async_queue *queue = data;
 +
 +    while (1) {
 +        struct work_item *work;
 +        int ret = 0;
 +        qemu_mutex_lock((queue-lock));
 +
 +        while (QTAILQ_EMPTY((queue-request_list))
 +               (ret != ETIMEDOUT)) {
 +            ret = qemu_cond_timedwait((queue-cond),
 +                                       (queue-lock), 10*10);
 +        }
 +
 +        if (QTAILQ_EMPTY((queue-request_list)))
 +            goto check_exit;
 +
 +        work = QTAILQ_FIRST((queue-request_list));
 +        QTAILQ_REMOVE((queue-request_list), work, node);
 +        queue-idle_threads--;
 +        qemu_mutex_unlock((queue-lock));
 +
 +        /* execute the work function */
 +        work-func(work);
 +        async_work_release(queue, work);
 +
 +        qemu_mutex_lock((queue-lock));
 +        queue-idle_threads++;
 +
 +check_exit:
 +        if ((queue-idle_threads  0)
 +            (queue-cur_threads  queue-min_threads)) {
 +            /* we retain minimum number of threads */
 +            break;
 +        }
 +        qemu_mutex_unlock((queue-lock));
 +    }
 +
 +    queue-idle_threads--;
 +    queue-cur_threads--;
 +    qemu_mutex_unlock((queue-lock));
 +
 +    return NULL;
 +}
 +
 +static void spawn_async_thread(struct async_queue *queue)
 +{
 +    QemuThreadAttr attr;
 +    QemuThread thread;
 +    sigset_t set, oldset;
 +
 +    queue-cur_threads++;
 +    queue-idle_threads++;
 +
 +    qemu_thread_attr_init(attr);
 +
 +    /* create a detached thread so that we don't need to wait on it */
 +    qemu_thread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED);
 +
 +    /* block all signals */
 +    if (sigfillset(set)) {
 +        async_abort(errno, sigfillset);
 +    }
 +
 +    if (sigprocmask(SIG_SETMASK,set,oldset)) {
 +        async_abort(errno, sigprocmask);
 +    }
 +
 +    qemu_thread_create_attr(thread,attr, async_worker_thread, queue);
 +
 +    if (sigprocmask(SIG_SETMASK,oldset, NULL)) {
 +        async_abort(errno, sigprocmask restore);
 +    }
 +}
 +
 +void qemu_async_submit(struct async_queue *queue, struct work_item *work)
 +{
 +    qemu_mutex_lock((queue-lock));
 +    if (queue-idle_threads == 0  queue-cur_threads
  queue-max_threads) {
 +        

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-06-05 Thread Blue Swirl
On Sat, Jun 5, 2010 at 12:04 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Thu, Jun 3, 2010 at 7:06 AM, Gleb Natapov g...@redhat.com wrote:
 On Thu, Jun 03, 2010 at 10:03:00AM +0300, Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:59:23AM +0200, Jan Kiszka wrote:
 Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:23:46AM +0200, Jan Kiszka wrote:
 Blue Swirl wrote:
 But how about if we introduced instead a message based IRQ? Then the
 message could specify the originator device, maybe ACK/coalesce/NACK
 callbacks and a bigger payload than just 1 bit of level. I think that
 could achieve the same coalescing effect as what the bidirectional
 IRQ. The payload could be useful for other purposes, for example
 Sparc64 IRQ messages contain three 64 bit words.
 If there are more users than just IRQ de-coalescing, this indeed sounds
 superior. We could pass objects like this one around:

 struct qemu_irq_msg {
  void (*delivery_cb)(int result);
  void *payload;
 };

 They would be valid within the scope of the IRQ handlers. Whoever
 terminates or actually delivers the IRQ would invoke the callback. And
 platforms like sparc64 could evaluate the additional payload pointer in
 their irqchips or wherever they need it. IRQ routers on platforms that
 make use of these messages would have to replicate them when forwarding
 an event.

 OK?

 Let me see if I understand you correctly. qemu_set_irq() will get
 additional parameter qemu_irq_msg and if irq was not coalesced
 delivery_cb is called, so there is a guaranty that if delivery_cb is
 called it is done before qemu_set_irq() returns. Correct?
 If the side that triggers an IRQ passes a message object with a non-NULL
 callback, it is supposed to be called unconditionally, passing the
 result of the delivery (delivered, masked, coalesced). And yes, the
 callback will be invoked in the context of the irq handler, so before
 qemu_set_irq (or rather some new qemu_set_irq_msg) returns.

 Looks fine to me.

 Except that delivery_cb should probably get pointer to qemu_irq_msg as a
 parameter.

 I'd like to also support EOI handling. When the guest clears the
 interrupt condtion, the EOI callback would be called. This could occur
 much later than the IRQ delivery time. I'm not sure if we need the
 result code in that case.

 If any intermediate device (IOAPIC?) needs to be informed about either
 delivery or EOI also, it could create a proxy message with its
 callbacks in place. But we need then a separate opaque field (in
 addition to payload) to store the original message.

 struct IRQMsg {
  DeviceState *src;
  void (*delivery_cb)(IRQMsg *msg, int result);
  void (*eoi_cb)(IRQMsg *msg, int result);
  void *src_opaque;
  void *payload;
 };

 Extending the lifetime of IRQMsg objects beyond the delivery call stack
 means qemu_malloc/free for every delivery. I think it takes a _very_
 appealing reason to justify this. But so far I do not see any use case
 for eio_cb at all.

I think it's safer to use allocation model anyway because this will be
generic code. For example, an intermediate device may want to queue
the IRQs. Alternatively, the callbacks could use DeviceState and some
opaque which can be used as the callback context:
  void (*delivery_cb)(DeviceState *src, void *src_opaque, int result);

EOI can be added later if needed, QEMU seems to work fine now without
it. But based on IOAPIC data sheet, I'd suppose it should be need to
pass EOI from LAPIC to IOAPIC. It could be used by coalescing as
another opportunity to inject IRQs though I guess the guest will ack
the IRQ at the same time for both RTC and APIC.



Re: [Qemu-devel] [PATCH-V5 0/10] virtio-9p:Introducing security model for VirtFS

2010-06-05 Thread Blue Swirl
On Sat, Jun 5, 2010 at 1:08 AM, Venkateswararao Jujjuri (JV)
jv...@linux.vnet.ibm.com wrote:
 This patch series introduces the security model for VirtFS.

 Brief description of this patch series:

 It introduces two type of security models for VirtFS.
 They are: mapped and passthrough.

 The following is common to both security models.

 * Client's VFS determines/enforces the access control.
  Largely server should never return EACCESS.

 * Client sends gid/mode-bit information as part of creation only.

 Changes from V4
 ---
 o Most of the cosmetic changes proposed by Aneesh.
 o Divided into more number of patches and added more explanation to each 
 patch.

 Changes from V3
 ---
 o Return NULL instead of exit(1) on failure in virtio_9p_init()
 o Capitalized sm_passthrough, sm_mappe
 o Added handling for EINTR for read/write.
 o Corrected default permissions for mkdir in mapped mode.
 o Added additional error handling.

 Changes from V2
 ---
 o Removed warnings resulting from chmod/chown.
 o Added code to fail normally if secuirty_model option is not specified.

 Changes from V1
 ---
 o Added support for chmod and chown.
 o Used chmod/chown to set credentials instead of setuid/setgid.
 o Fixed a bug where uid used instated of uid.


 Security model: mapped
 --

 VirtFS server(QEMU) intercepts and maps all the file object create requests.
 Files on the fileserver will be created with QEMU's user credentials and the
 client-user's credentials are stored in extended attributes.
 During getattr() server extracts the client-user's credentials from extended
 attributes and sends to the client.

 Given that only the user space extended attributes are available to regular
 files, special files are created as regular files on the fileserver and the
 appropriate mode bits are stored in xattrs and will be extracted during
 getattr.

 If the extended attributes are missing, server sends back the filesystem
 stat() unaltered. This provision will make the files created on the
 fileserver usable to client.

 Points to be considered

 * Filesystem will be VirtFS'ized. Meaning, other filesystems may not
  understand the credentials of the files created under this model.

 * Regular utilities like 'df' may not report required results in this model.
  Need for special reporting utilities which can understand this security 
 model.

Just some thought: This part could have other uses too, for example
vext3 would be more useful than vvfat. Also VFAT attributes
hidden/system/archived could be implemented with extended attributes.

Another point to consider is that this will be Linux specific, if my
earlier patch to make v9fs available to all Posix systems makes sense,
this part should be conditional.

 Security model : passthrough
 

 In this security model, VirtFS server passes down all requests to the
 underlying filesystem. File system objects on the fileserver will be created
 with client-user's credentials. This is done by setting setuid()/setgid()
 during creation or ch* after file creation. At the end of create protocol
 request, files on the fileserver will be owned by cleint-user's uid/gid.

 Points to be considered

  * Fileserver should always run as 'root'.
  * Root squashing may be needed. Will be for future work.
  * Potential for user credential clash between guest's user space IDs and
    host's user space IDs.

 It also adds security model attribute to -fsdev device and to -virtfs 
 shortcut.

I'd suppose it may be useful to also allow passthrough for non-root
users so that for example user's home directory can be shared.



[Qemu-devel] Re: [PATCH v2 2/2] vnc: threaded VNC server

2010-06-05 Thread Corentin Chary
On Fri, Jun 4, 2010 at 3:44 PM, Alexander Graf ag...@suse.de wrote:

 On 04.06.2010, at 15:20, Corentin Chary wrote:

 Implement a threaded VNC server using the producer-consumer model.
 The main thread will push encoding jobs (a list a rectangles to update)
 in a queue, and the VNC worker thread will consume that queue and send
 framebuffer updates to the output buffer.

 The threaded VNC server can be enabled with ./configure --enable-vnc-thread.

 If you don't want it, just use ./configure --disable-vnc-thread and a 
 syncrhonous
 queue of job will be used (which as exactly the same behavior as the old 
 queue).
 If you disable the VNC thread, all thread related code will not be built and 
 there will
 be no overhead.

 Signed-off-by: Corentin Chary corenti...@iksaif.net
 ---
 Makefile.objs      |    7 +-
 configure          |   13 ++
 ui/vnc-jobs-sync.c |   65 ++
 ui/vnc-jobs.c      |  351 
 
 ui/vnc.c           |  169 ++
 ui/vnc.h           |   75 +++
 6 files changed, 657 insertions(+), 23 deletions(-)
 create mode 100644 ui/vnc-jobs-sync.c
 create mode 100644 ui/vnc-jobs.c

 diff --git a/Makefile.objs b/Makefile.objs
 index 22622a9..0c6334b 100644
 --- a/Makefile.objs
 +++ b/Makefile.objs
 @@ -109,10 +109,15 @@ ui-obj-y += vnc-enc-tight.o
 ui-obj-$(CONFIG_VNC_TLS) += vnc-tls.o vnc-auth-vencrypt.o
 ui-obj-$(CONFIG_VNC_SASL) += vnc-auth-sasl.o
 ui-obj-$(CONFIG_COCOA) += cocoa.o
 +ifdef CONFIG_VNC_THREAD
 +ui-obj-y += vnc-jobs.o
 +else
 +ui-obj-y += vnc-jobs-sync.o
 +endif
 common-obj-y += $(addprefix ui/, $(ui-obj-y))

 common-obj-y += iov.o acl.o
 -common-obj-$(CONFIG_IOTHREAD) += qemu-thread.o
 +common-obj-$(CONFIG_THREAD) += qemu-thread.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o

 diff --git a/configure b/configure
 index 679f2fc..6f2e3a7 100755
 --- a/configure
 +++ b/configure
 @@ -264,6 +264,7 @@ vde=
 vnc_tls=
 vnc_sasl=
 vnc_jpeg=
 +vnc_thread=
 xen=
 linux_aio=
 vhost_net=
 @@ -552,6 +553,10 @@ for opt do
   ;;
   --enable-vnc-jpeg) vnc_jpeg=yes
   ;;
 +  --disable-vnc-thread) vnc_thread=no
 +  ;;
 +  --enable-vnc-thread) vnc_thread=yes
 +  ;;
   --disable-slirp) slirp=no
   ;;
   --disable-uuid) uuid=no
 @@ -786,6 +791,8 @@ echo   --disable-vnc-sasl       disable SASL encryption 
 for VNC server
 echo   --enable-vnc-sasl        enable SASL encryption for VNC server
 echo   --disable-vnc-jpeg       disable JPEG lossy compression for VNC 
 server
 echo   --enable-vnc-jpeg        enable JPEG lossy compression for VNC 
 server
 +echo   --disable-vnc-thread     disable threaded VNC server
 +echo   --enable-vnc-thread      enable threaded VNC server
 echo   --disable-curses         disable curses output
 echo   --enable-curses          enable curses output
 echo   --disable-curl           disable curl connectivity
 @@ -2048,6 +2055,7 @@ echo Mixer emulation   $mixemu
 echo VNC TLS support   $vnc_tls
 echo VNC SASL support  $vnc_sasl
 echo VNC JPEG support  $vnc_jpeg
 +echo VNC thread        $vnc_thread
 if test -n $sparc_cpu; then
     echo Target Sparc Arch $sparc_cpu
 fi
 @@ -2191,6 +2199,10 @@ if test $vnc_jpeg = yes ; then
   echo CONFIG_VNC_JPEG=y  $config_host_mak
   echo VNC_JPEG_CFLAGS=$vnc_jpeg_cflags  $config_host_mak
 fi
 +if test $vnc_thread = yes ; then

 So it's disabled by default? Sounds like a pretty cool and useful feature to 
 me that should be enabled by default.

Because it's does not work on  windows (qemu-thread.c only uses
pthread) and because I don't want to break everything :)

 +  echo CONFIG_VNC_THREAD=y  $config_host_mak
 +  echo CONFIG_THREAD=y  $config_host_mak
 +fi
 if test $fnmatch = yes ; then
   echo CONFIG_FNMATCH=y  $config_host_mak
 fi
 @@ -2267,6 +2279,7 @@ if test $xen = yes ; then
 fi
 if test $io_thread = yes ; then
   echo CONFIG_IOTHREAD=y  $config_host_mak
 +  echo CONFIG_THREAD=y  $config_host_mak
 fi
 if test $linux_aio = yes ; then
   echo CONFIG_LINUX_AIO=y  $config_host_mak
 diff --git a/ui/vnc-jobs-sync.c b/ui/vnc-jobs-sync.c
 new file mode 100644
 index 000..9f138f5
 --- /dev/null
 +++ b/ui/vnc-jobs-sync.c
 @@ -0,0 +1,65 @@
 +/*
 + * QEMU VNC display driver
 + *
 + * Copyright (C) 2006 Anthony Liguori anth...@codemonkey.ws
 + * Copyright (C) 2006 Fabrice Bellard
 + * Copyright (C) 2009 Red Hat, Inc
 + * Copyright (C) 2010 Corentin Chary corentin.ch...@gmail.com
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a 
 copy
 + * of this software and associated documentation files (the Software), to 
 deal
 + * in the Software without restriction, including without limitation the 
 rights
 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 + * copies of the Software, and to permit persons to whom the Software is
 + * furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included 
 in
 + * all 

Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-06-05 Thread Jan Kiszka
Blue Swirl wrote:
 On Sat, Jun 5, 2010 at 12:04 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Thu, Jun 3, 2010 at 7:06 AM, Gleb Natapov g...@redhat.com wrote:
 On Thu, Jun 03, 2010 at 10:03:00AM +0300, Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:59:23AM +0200, Jan Kiszka wrote:
 Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:23:46AM +0200, Jan Kiszka wrote:
 Blue Swirl wrote:
 But how about if we introduced instead a message based IRQ? Then the
 message could specify the originator device, maybe ACK/coalesce/NACK
 callbacks and a bigger payload than just 1 bit of level. I think that
 could achieve the same coalescing effect as what the bidirectional
 IRQ. The payload could be useful for other purposes, for example
 Sparc64 IRQ messages contain three 64 bit words.
 If there are more users than just IRQ de-coalescing, this indeed sounds
 superior. We could pass objects like this one around:

 struct qemu_irq_msg {
  void (*delivery_cb)(int result);
  void *payload;
 };

 They would be valid within the scope of the IRQ handlers. Whoever
 terminates or actually delivers the IRQ would invoke the callback. And
 platforms like sparc64 could evaluate the additional payload pointer in
 their irqchips or wherever they need it. IRQ routers on platforms that
 make use of these messages would have to replicate them when forwarding
 an event.

 OK?

 Let me see if I understand you correctly. qemu_set_irq() will get
 additional parameter qemu_irq_msg and if irq was not coalesced
 delivery_cb is called, so there is a guaranty that if delivery_cb is
 called it is done before qemu_set_irq() returns. Correct?
 If the side that triggers an IRQ passes a message object with a non-NULL
 callback, it is supposed to be called unconditionally, passing the
 result of the delivery (delivered, masked, coalesced). And yes, the
 callback will be invoked in the context of the irq handler, so before
 qemu_set_irq (or rather some new qemu_set_irq_msg) returns.

 Looks fine to me.

 Except that delivery_cb should probably get pointer to qemu_irq_msg as a
 parameter.
 I'd like to also support EOI handling. When the guest clears the
 interrupt condtion, the EOI callback would be called. This could occur
 much later than the IRQ delivery time. I'm not sure if we need the
 result code in that case.

 If any intermediate device (IOAPIC?) needs to be informed about either
 delivery or EOI also, it could create a proxy message with its
 callbacks in place. But we need then a separate opaque field (in
 addition to payload) to store the original message.

 struct IRQMsg {
  DeviceState *src;
  void (*delivery_cb)(IRQMsg *msg, int result);
  void (*eoi_cb)(IRQMsg *msg, int result);
  void *src_opaque;
  void *payload;
 };
 Extending the lifetime of IRQMsg objects beyond the delivery call stack
 means qemu_malloc/free for every delivery. I think it takes a _very_
 appealing reason to justify this. But so far I do not see any use case
 for eio_cb at all.
 
 I think it's safer to use allocation model anyway because this will be
 generic code. For example, an intermediate device may want to queue
 the IRQs. Alternatively, the callbacks could use DeviceState and some
 opaque which can be used as the callback context:
   void (*delivery_cb)(DeviceState *src, void *src_opaque, int result);
 
 EOI can be added later if needed, QEMU seems to work fine now without
 it. But based on IOAPIC data sheet, I'd suppose it should be need to
 pass EOI from LAPIC to IOAPIC. It could be used by coalescing as
 another opportunity to inject IRQs though I guess the guest will ack
 the IRQ at the same time for both RTC and APIC.

Let's wait for a real use case for an extended IRQMsg lifetime. For now
we are fine with stack-allocated messages which are way simpler to
handle. I'm already drafting a first prototype based on this model.
Switching to dynamic allocation may still happen later on once the
urgent need shows up.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-06-05 Thread Blue Swirl
On Sat, Jun 5, 2010 at 8:27 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Sat, Jun 5, 2010 at 12:04 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Thu, Jun 3, 2010 at 7:06 AM, Gleb Natapov g...@redhat.com wrote:
 On Thu, Jun 03, 2010 at 10:03:00AM +0300, Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:59:23AM +0200, Jan Kiszka wrote:
 Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:23:46AM +0200, Jan Kiszka wrote:
 Blue Swirl wrote:
 But how about if we introduced instead a message based IRQ? Then the
 message could specify the originator device, maybe ACK/coalesce/NACK
 callbacks and a bigger payload than just 1 bit of level. I think that
 could achieve the same coalescing effect as what the bidirectional
 IRQ. The payload could be useful for other purposes, for example
 Sparc64 IRQ messages contain three 64 bit words.
 If there are more users than just IRQ de-coalescing, this indeed 
 sounds
 superior. We could pass objects like this one around:

 struct qemu_irq_msg {
  void (*delivery_cb)(int result);
  void *payload;
 };

 They would be valid within the scope of the IRQ handlers. Whoever
 terminates or actually delivers the IRQ would invoke the callback. And
 platforms like sparc64 could evaluate the additional payload pointer 
 in
 their irqchips or wherever they need it. IRQ routers on platforms that
 make use of these messages would have to replicate them when 
 forwarding
 an event.

 OK?

 Let me see if I understand you correctly. qemu_set_irq() will get
 additional parameter qemu_irq_msg and if irq was not coalesced
 delivery_cb is called, so there is a guaranty that if delivery_cb is
 called it is done before qemu_set_irq() returns. Correct?
 If the side that triggers an IRQ passes a message object with a non-NULL
 callback, it is supposed to be called unconditionally, passing the
 result of the delivery (delivered, masked, coalesced). And yes, the
 callback will be invoked in the context of the irq handler, so before
 qemu_set_irq (or rather some new qemu_set_irq_msg) returns.

 Looks fine to me.

 Except that delivery_cb should probably get pointer to qemu_irq_msg as a
 parameter.
 I'd like to also support EOI handling. When the guest clears the
 interrupt condtion, the EOI callback would be called. This could occur
 much later than the IRQ delivery time. I'm not sure if we need the
 result code in that case.

 If any intermediate device (IOAPIC?) needs to be informed about either
 delivery or EOI also, it could create a proxy message with its
 callbacks in place. But we need then a separate opaque field (in
 addition to payload) to store the original message.

 struct IRQMsg {
  DeviceState *src;
  void (*delivery_cb)(IRQMsg *msg, int result);
  void (*eoi_cb)(IRQMsg *msg, int result);
  void *src_opaque;
  void *payload;
 };
 Extending the lifetime of IRQMsg objects beyond the delivery call stack
 means qemu_malloc/free for every delivery. I think it takes a _very_
 appealing reason to justify this. But so far I do not see any use case
 for eio_cb at all.

 I think it's safer to use allocation model anyway because this will be
 generic code. For example, an intermediate device may want to queue
 the IRQs. Alternatively, the callbacks could use DeviceState and some
 opaque which can be used as the callback context:
   void (*delivery_cb)(DeviceState *src, void *src_opaque, int result);

 EOI can be added later if needed, QEMU seems to work fine now without
 it. But based on IOAPIC data sheet, I'd suppose it should be need to
 pass EOI from LAPIC to IOAPIC. It could be used by coalescing as
 another opportunity to inject IRQs though I guess the guest will ack
 the IRQ at the same time for both RTC and APIC.

 Let's wait for a real use case for an extended IRQMsg lifetime. For now
 we are fine with stack-allocated messages which are way simpler to
 handle. I'm already drafting a first prototype based on this model.
 Switching to dynamic allocation may still happen later on once the
 urgent need shows up.

Passing around stack allocated objects is asking for trouble. I'd much
rather use the DeviceState/opaque version then, so at least
destination should not need to use IRQMsg for anything.



Re: [Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device

2010-06-05 Thread Blue Swirl
On Fri, Jun 4, 2010 at 9:45 PM, Cam Macdonell c...@cs.ualberta.ca wrote:
 Support an inter-vm shared memory device that maps a shared-memory object as a
 PCI device in the guest.  This patch also supports interrupts between guest by
 communicating over a unix domain socket.  This patch applies to the qemu-kvm
 repository.

    -device ivshmem,size=size in format accepted by -m[,shm=shm name]

 Interrupts are supported between multiple VMs by using a shared memory server
 by using a chardev socket.

    -device ivshmem,size=size in format accepted by -m[,shm=shm name]
           [,chardev=id][,msi=on][,irqfd=on][,vectors=n][,role=peer|master]
    -chardev socket,path=path,id=id

 (shared memory server is qemu.git/contrib/ivshmem-server)

 Sample programs and init scripts are in a git repo here:

    www.gitorious.org/nahanni
 ---
  Makefile.target |    3 +
  hw/ivshmem.c    |  852 
 +++
  qemu-char.c     |    6 +
  qemu-char.h     |    3 +
  qemu-doc.texi   |   43 +++
  5 files changed, 907 insertions(+), 0 deletions(-)
  create mode 100644 hw/ivshmem.c

 diff --git a/Makefile.target b/Makefile.target
 index c4ba592..4888308 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -202,6 +202,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o
  obj-y += rtl8139.o
  obj-y += e1000.o

 +# Inter-VM PCI shared memory
 +obj-y += ivshmem.o
 +

Can this be compiled once, simply by moving this to Makefile.objs
instead of Makefile.target? Also, because the code seems to be KVM
specific, it can't be compiled unconditionally but depending on at
least CONFIG_KVM and maybe CONFIG_EVENTFD.

Why is this KVM specific BTW, Posix SHM is available on many
platforms? What would happen if kvm_set_foobar functions were not
called when KVM is not being used? Is host eventfd support essential?

  # Hardware support
  obj-i386-y += vga.o
  obj-i386-y += mc146818rtc.o i8259.o pc.o
 diff --git a/hw/ivshmem.c b/hw/ivshmem.c
 new file mode 100644
 index 000..9057612
 --- /dev/null
 +++ b/hw/ivshmem.c
 @@ -0,0 +1,852 @@
 +/*
 + * Inter-VM Shared Memory PCI device.
 + *
 + * Author:
 + *      Cam Macdonell c...@cs.ualberta.ca
 + *
 + * Based On: cirrus_vga.c
 + *          Copyright (c) 2004 Fabrice Bellard
 + *          Copyright (c) 2004 Makoto Suzuki (suzu)
 + *
 + *      and rtl8139.c
 + *          Copyright (c) 2006 Igor Kovalenko
 + *
 + * This code is licensed under the GNU GPL v2.
 + */
 +#include sys/mman.h
 +#include sys/types.h
 +#include sys/socket.h
 +#include sys/io.h
 +#include sys/ioctl.h
 +#include hw.h
 +#include console.h
 +#include pc.h
 +#include pci.h
 +#include sysemu.h
 +
 +#include msix.h
 +#include qemu-kvm.h
 +#include libkvm.h
 +
 +#include sys/eventfd.h
 +#include sys/mman.h
 +#include sys/socket.h
 +#include sys/ioctl.h
 +
 +#define IVSHMEM_IRQFD   0
 +#define IVSHMEM_MSI     1
 +
 +//#define DEBUG_IVSHMEM
 +#ifdef DEBUG_IVSHMEM
 +#define IVSHMEM_DPRINTF(fmt, args...)        \
 +    do {printf(IVSHMEM:  fmt, ##args); } while (0)

Please use __VA_ARGS__.

 +#else
 +#define IVSHMEM_DPRINTF(fmt, args...)
 +#endif
 +
 +typedef struct Peer {
 +    int nb_eventfds;
 +    int *eventfds;
 +} Peer;
 +
 +typedef struct EventfdEntry {
 +    PCIDevice *pdev;
 +    int vector;
 +} EventfdEntry;
 +
 +typedef struct IVShmemState {
 +    PCIDevice dev;
 +    uint32_t intrmask;
 +    uint32_t intrstatus;
 +    uint32_t doorbell;
 +
 +    CharDriverState ** eventfd_chr;

I'd remove the space between '**' and 'eventfd_chr', it's used inconsistently.

 +    CharDriverState * server_chr;
 +    int ivshmem_mmio_io_addr;
 +
 +    pcibus_t mmio_addr;
 +    pcibus_t shm_pci_addr;
 +    uint64_t ivshmem_offset;
 +    uint64_t ivshmem_size; /* size of shared memory region */
 +    int shm_fd; /* shared memory file descriptor */
 +
 +    Peer *peers;
 +    int nb_peers; /* how many guests we have space for */
 +    int max_peer; /* maximum numbered peer */
 +
 +    int vm_id;
 +    uint32_t vectors;
 +    uint32_t features;
 +    EventfdEntry *eventfd_table;
 +
 +    char * shmobj;
 +    char * sizearg;
 +    char * role;
 +} IVShmemState;
 +
 +/* registers for the Inter-VM shared memory device */
 +enum ivshmem_registers {
 +    IntrMask = 0,
 +    IntrStatus = 4,
 +    IVPosition = 8,
 +    Doorbell = 12,
 +};

IIRC these should be uppercase.

 +
 +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) {
 +    return (ivs-features  (1  feature));
 +}

Since this is the first version, do we need any features at this
point, can't we expect that all features are available now? Why does
the user need to specify the features?

To avoid a negative shift, I'd make 'feature' unsigned.

 +
 +static inline bool is_power_of_two(uint64_t x) {
 +    return (x  (x - 1)) == 0;
 +}
 +
 +static void ivshmem_map(PCIDevice *pci_dev, int region_num,
 +                    pcibus_t addr, pcibus_t size, int type)
 +{
 +    IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
 +
 +    

[Qemu-devel] [PATCH v4 1/7] MIPS: Initial support of bonito north bridge used by fulong mini pc

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 Makefile.target  |1 +
 default-configs/mips64el-softmmu.mak |1 +
 hw/bonito.c  |  808 ++
 hw/mips.h|3 +
 4 files changed, 813 insertions(+), 0 deletions(-)
 create mode 100644 hw/bonito.c

diff --git a/Makefile.target b/Makefile.target
index d06c679..ac36e2c 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -221,6 +221,7 @@ obj-mips-y += vga.o i8259.o
 obj-mips-y += g364fb.o jazz_led.o
 obj-mips-y += gt64xxx.o mc146818rtc.o
 obj-mips-y += piix4.o cirrus_vga.o
+obj-mips-$(CONFIG_FULONG) += bonito.o
 
 obj-microblaze-y = petalogix_s3adsp1800_mmu.o
 
diff --git a/default-configs/mips64el-softmmu.mak 
b/default-configs/mips64el-softmmu.mak
index b372c1d..d35d923 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -29,3 +29,4 @@ CONFIG_DP8393X=y
 CONFIG_DS1225Y=y
 CONFIG_MIPSNET=y
 CONFIG_PFLASH_CFI01=y
+CONFIG_FULONG=y
diff --git a/hw/bonito.c b/hw/bonito.c
new file mode 100644
index 000..9dda7ab
--- /dev/null
+++ b/hw/bonito.c
@@ -0,0 +1,808 @@
+/*
+ * bonito north bridge support
+ *
+ * Copyright (c) 2008 yajin (ya...@vm-kernel.org)
+ * Copyright (c) 2010 Huacai Chen (zltjiang...@gmail.com)
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+/*
+ * fulong 2e mini pc has a bonito north bridge.
+ */
+
+/* what is the meaning of devfn in qemu and IDSEL in bonito northbridge?
+ *
+ * devfn   pci_slot3  + funno
+ * one pci bus can have 32 devices and each device can have 8 functions.
+ *
+ * In bonito north bridge, pci slot = IDSEL bit - 12.
+ * For example, PCI_IDSEL_VIA686B = 17,
+ * pci slot = 17-12=5
+ *
+ * so
+ * VT686B_FUN0's devfn = (53)+0
+ * VT686B_FUN1's devfn = (53)+1
+ *
+ * qemu also uses pci address for north bridge to access pci config register.
+ * bus_no   [23:16]
+ * dev_no   [15:11]
+ * fun_no   [10:8]
+ * reg_no   [7:2]
+ *
+ * so function bonito_sbridge_pciaddr for the translation from
+ * north bridge address to pci address.
+ */
+
+#include assert.h
+
+#include hw.h
+#include pci.h
+#include pc.h
+#include mips.h
+#include pci_host.h
+
+//#define DEBUG_BONITO
+
+#ifdef DEBUG_BONITO
+#define DPRINTF(fmt, ...) fprintf(stderr, %s:  fmt, __FUNCTION__, 
##__VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...)
+#endif
+
+/* from linux soure code. include/asm-mips/mips-boards/bonito64.h*/
+#define BONITO_BOOT_BASE0x1fc0
+#define BONITO_BOOT_SIZE0x0010
+#define BONITO_BOOT_TOP (BONITO_BOOT_BASE+BONITO_BOOT_SIZE-1)
+#define BONITO_FLASH_BASE   0x1c00
+#define BONITO_FLASH_SIZE   0x0300
+#define BONITO_FLASH_TOP(BONITO_FLASH_BASE+BONITO_FLASH_SIZE-1)
+#define BONITO_SOCKET_BASE  0x1f80
+#define BONITO_SOCKET_SIZE  0x0040
+#define BONITO_SOCKET_TOP   (BONITO_SOCKET_BASE+BONITO_SOCKET_SIZE-1)
+#define BONITO_REG_BASE 0x1fe0
+#define BONITO_REG_SIZE 0x0004
+#define BONITO_REG_TOP  (BONITO_REG_BASE+BONITO_REG_SIZE-1)
+#define BONITO_DEV_BASE 0x1ff0
+#define BONITO_DEV_SIZE 0x0010
+#define BONITO_DEV_TOP  (BONITO_DEV_BASE+BONITO_DEV_SIZE-1)
+#define BONITO_PCILO_BASE   0x1000
+#define BONITO_PCILO_BASE_VA0xb000
+#define BONITO_PCILO_SIZE   0x0c00
+#define BONITO_PCILO_TOP(BONITO_PCILO_BASE+BONITO_PCILO_SIZE-1)
+#define BONITO_PCILO0_BASE  0x1000
+#define BONITO_PCILO1_BASE  0x1400
+#define BONITO_PCILO2_BASE  0x1800
+#define BONITO_PCIHI_BASE   0x2000
+#define BONITO_PCIHI_SIZE   0x2000
+#define BONITO_PCIHI_TOP(BONITO_PCIHI_BASE+BONITO_PCIHI_SIZE-1)
+#define BONITO_PCIIO_BASE   0x1fd0
+#define BONITO_PCIIO_BASE_VA0xbfd0
+#define BONITO_PCIIO_SIZE   0x0001
+#define BONITO_PCIIO_TOP(BONITO_PCIIO_BASE+BONITO_PCIIO_SIZE-1)
+#define BONITO_PCICFG_BASE  0x1fe8
+#define BONITO_PCICFG_SIZE  0x0008
+#define BONITO_PCICFG_TOP   (BONITO_PCICFG_BASE+BONITO_PCICFG_SIZE-1)
+
+
+#define BONITO_PCICONFIGBASE0x00
+#define BONITO_REGBASE  0x100
+
+#define BONITO_PCICONFIG_BASE   (BONITO_PCICONFIGBASE+BONITO_REG_BASE)
+#define BONITO_PCICONFIG_SIZE   (0x100)
+
+#define BONITO_INTERNAL_REG_BASE  (BONITO_REGBASE+BONITO_REG_BASE)
+#define BONITO_INTERNAL_REG_SIZE  (0x70)
+
+#define BONITO_SPCICONFIG_BASE  (BONITO_PCICFG_BASE)
+#define BONITO_SPCICONFIG_SIZE  (BONITO_PCICFG_SIZE)
+
+
+
+/* 1. Bonito h/w Configuration */
+/* Power on register */
+
+#define BONITO_BONPONCFG(0x00  2)  /* 0x100 */
+#define BONITO_BONGENCFG_OFFSET 0x4
+#define BONITO_BONGENCFG(BONITO_BONGENCFG_OFFSET2)   /*0x104 */
+
+/* 2. IO  IDE configuration */
+#define BONITO_IODEVCFG (0x08  2)  /* 0x108 */
+
+/* 3. IO  IDE configuration */
+#define BONITO_SDCFG(0x0c  2)  /* 0x10c */
+
+/* 4. PCI address map 

[Qemu-devel] [PATCH v4 3/7] MIPS: Initial support of VIA IDE controller used by fulong mini pc

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 Makefile.objs|1 +
 default-configs/mips64el-softmmu.mak |1 +
 hw/ide.h |1 +
 hw/ide/via.c |  191 ++
 4 files changed, 194 insertions(+), 0 deletions(-)
 create mode 100644 hw/ide/via.c

diff --git a/Makefile.objs b/Makefile.objs
index 9796dcb..ad9a825 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -200,6 +200,7 @@ hw-obj-$(CONFIG_IDE_ISA) += ide/isa.o
 hw-obj-$(CONFIG_IDE_PIIX) += ide/piix.o
 hw-obj-$(CONFIG_IDE_CMD646) += ide/cmd646.o
 hw-obj-$(CONFIG_IDE_MACIO) += ide/macio.o
+hw-obj-$(CONFIG_IDE_VIA) += ide/via.o
 
 # SCSI layer
 hw-obj-y += lsi53c895a.o
diff --git a/default-configs/mips64el-softmmu.mak 
b/default-configs/mips64el-softmmu.mak
index d35d923..85b7838 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -21,6 +21,7 @@ CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
+CONFIG_IDE_VIA=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
 CONFIG_VIRTIO_PCI=y
diff --git a/hw/ide.h b/hw/ide.h
index 0e7d540..bb635b6 100644
--- a/hw/ide.h
+++ b/hw/ide.h
@@ -12,6 +12,7 @@ void pci_cmd646_ide_init(PCIBus *bus, DriveInfo **hd_table,
  int secondary_ide_enabled);
 void pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 void pci_piix4_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
+void vt82c686b_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 
 /* ide-macio.c */
 int pmac_ide_init (DriveInfo **hd_table, qemu_irq irq,
diff --git a/hw/ide/via.c b/hw/ide/via.c
new file mode 100644
index 000..f99dfcc
--- /dev/null
+++ b/hw/ide/via.c
@@ -0,0 +1,191 @@
+/*
+ * QEMU IDE Emulation: PCI VIA82C686B support.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Openedhand Ltd.
+ * Copyright (c) 2010 Huacai Chen zltjiang...@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the Software), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include hw/hw.h
+#include hw/pc.h
+#include hw/pci.h
+#include hw/isa.h
+#include block.h
+#include block_int.h
+#include sysemu.h
+#include dma.h
+
+#include hw/ide/pci.h
+
+static uint32_t bmdma_readb(void *opaque, uint32_t addr)
+{
+BMDMAState *bm = opaque;
+uint32_t val;
+
+switch (addr  3) {
+case 0:
+val = bm-cmd;
+break;
+case 2:
+val = bm-status;
+break;
+default:
+val = 0xff;
+break;
+}
+#ifdef DEBUG_IDE
+printf(bmdma: readb 0x%02x : 0x%02x\n, addr, val);
+#endif
+return val;
+}
+
+static void bmdma_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+BMDMAState *bm = opaque;
+#ifdef DEBUG_IDE
+printf(bmdma: writeb 0x%02x : 0x%02x\n, addr, val);
+#endif
+switch (addr  3) {
+case 2:
+bm-status = (val  0x60) | (bm-status  1) | (bm-status  ~val  
0x06);
+break;
+default:;
+}
+}
+
+static void bmdma_map(PCIDevice *pci_dev, int region_num,
+pcibus_t addr, pcibus_t size, int type)
+{
+PCIIDEState *d = DO_UPCAST(PCIIDEState, dev, pci_dev);
+int i;
+
+for(i = 0;i  2; i++) {
+BMDMAState *bm = d-bmdma[i];
+d-bus[i].bmdma = bm;
+bm-bus = d-bus+i;
+qemu_add_vm_change_state_handler(ide_dma_restart_cb, bm);
+
+register_ioport_write(addr, 1, 1, bmdma_cmd_writeb, bm);
+
+register_ioport_write(addr + 1, 3, 1, bmdma_writeb, bm);
+register_ioport_read(addr, 4, 1, bmdma_readb, bm);
+
+register_ioport_write(addr + 4, 4, 1, bmdma_addr_writeb, bm);
+register_ioport_read(addr + 4, 4, 1, bmdma_addr_readb, bm);
+register_ioport_write(addr + 4, 4, 2, bmdma_addr_writew, bm);
+register_ioport_read(addr + 4, 4, 2, bmdma_addr_readw, bm);
+register_ioport_write(addr + 4, 4, 4, bmdma_addr_writel, bm);
+register_ioport_read(addr + 4, 4, 4, bmdma_addr_readl, bm);
+addr 

[Qemu-devel] [PATCH v4 4/7] MIPS: Initial support of VIA USB controller used by fulong mini pc

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 hw/usb-uhci.c |   29 +
 hw/usb-uhci.h |1 +
 2 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c
index 624d55b..91d827e 100644
--- a/hw/usb-uhci.c
+++ b/hw/usb-uhci.c
@@ -1149,6 +1149,25 @@ static int usb_uhci_piix4_initfn(PCIDevice *dev)
 
 pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
 pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_2);
+pci_set_byte(pci_conf + PCI_LATENCY_TIMER, 0x16);
+
+/* USB misc control 1/2 */
+pci_set_long(pci_conf + 0x40,0x1000);
+/* PM capability */
+pci_set_long(pci_conf + 0x80,0x00020001);
+/* USB legacy support  */
+pci_set_long(pci_conf + 0xc0,0x2000);
+
+return usb_uhci_common_initfn(s);
+}
+
+static int usb_uhci_vt82c686b_initfn(PCIDevice *dev)
+{
+UHCIState *s = DO_UPCAST(UHCIState, dev, dev);
+uint8_t *pci_conf = s-dev.config;
+
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_VIA);
+pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_VIA_UHCI);
 return usb_uhci_common_initfn(s);
 }
 
@@ -1164,6 +1183,11 @@ static PCIDeviceInfo uhci_info[] = {
 .qdev.vmsd= vmstate_uhci,
 .init = usb_uhci_piix4_initfn,
 },{
+.qdev.name= vt82c686b-usb-uhci,
+.qdev.size= sizeof(UHCIState),
+.qdev.vmsd= vmstate_uhci,
+.init = usb_uhci_vt82c686b_initfn,
+},{
 /* end of list */
 }
 };
@@ -1183,3 +1207,8 @@ void usb_uhci_piix4_init(PCIBus *bus, int devfn)
 {
 pci_create_simple(bus, devfn, piix4-usb-uhci);
 }
+
+void usb_uhci_vt82c686b_init(PCIBus *bus, int devfn)
+{
+pci_create_simple(bus, devfn, vt82c686b-usb-uhci);
+}
diff --git a/hw/usb-uhci.h b/hw/usb-uhci.h
index 911948e..3e4d377 100644
--- a/hw/usb-uhci.h
+++ b/hw/usb-uhci.h
@@ -5,5 +5,6 @@
 
 void usb_uhci_piix3_init(PCIBus *bus, int devfn);
 void usb_uhci_piix4_init(PCIBus *bus, int devfn);
+void usb_uhci_vt82c686b_init(PCIBus *bus, int devfn);
 
 #endif
-- 
1.7.0.4




[Qemu-devel] [PATCH v4 6/7] MIPS: Initial support of fulong mini pc (machine construction)

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 Makefile.target|2 +-
 hw/mips_fulong2e.c |  402 
 2 files changed, 403 insertions(+), 1 deletions(-)
 create mode 100644 hw/mips_fulong2e.c

diff --git a/Makefile.target b/Makefile.target
index 92ba282..f203c6b 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -221,7 +221,7 @@ obj-mips-y += vga.o i8259.o
 obj-mips-y += g364fb.o jazz_led.o
 obj-mips-y += gt64xxx.o mc146818rtc.o
 obj-mips-y += piix4.o cirrus_vga.o
-obj-mips-$(CONFIG_FULONG) += bonito.o vt82c686.o
+obj-mips-$(CONFIG_FULONG) += bonito.o vt82c686.o mips_fulong2e.o
 
 obj-microblaze-y = petalogix_s3adsp1800_mmu.o
 
diff --git a/hw/mips_fulong2e.c b/hw/mips_fulong2e.c
new file mode 100644
index 000..1b54236
--- /dev/null
+++ b/hw/mips_fulong2e.c
@@ -0,0 +1,402 @@
+/*
+ * QEMU fulong 2e mini pc support
+ *
+ * Copyright (c) 2008 yajin (ya...@vm-kernel.org)
+ * Copyright (c) 2009 chenming (chenm...@rdc.faw.com.cn)
+ * Copyright (c) 2010 Huacai Chen (zltjiang...@gmail.com)
+ * This code is licensed under the GNU GPL v2.
+ */
+
+/*
+ * Fulong 2e mini pc is based on ICT/ST Loongson 2e CPU (MIPS III like, 800MHz)
+ * http://www.linux-mips.org/wiki/Fulong
+ *
+ * Loongson 2e user manual:
+ * http://www.loongsondeveloper.com/doc/Loongson2EUserGuide.pdf
+ */
+
+#include hw.h
+#include pc.h
+#include fdc.h
+#include net.h
+#include boards.h
+#include smbus.h
+#include block.h
+#include flash.h
+#include mips.h
+#include mips_cpudevs.h
+#include pci.h
+#include usb-uhci.h
+#include qemu-char.h
+#include sysemu.h
+#include audio/audio.h
+#include qemu-log.h
+#include loader.h
+#include mips-bios.h
+#include ide.h
+#include elf.h
+#include vt82c686.h
+#include mc146818rtc.h
+
+#define DEBUG_FULONG2E_INIT
+
+#define ENVP_ADDR   0x80002000l
+#define ENVP_NB_ENTRIES16
+#define ENVP_ENTRY_SIZE256
+
+#define MAX_IDE_BUS 2
+#define FULONG_BIOSNAME pmon_fulong2e.bin
+
+/* PCI SLOT in fulong 2e */
+#define FULONG2E_VIA_SLOT5
+#define FULONG2E_ATI_SLOT6
+#define FULONG2E_RTL8139_SLOT7
+
+static PITState *pit;
+
+static struct _loaderparams {
+int ram_size;
+const char *kernel_filename;
+const char *kernel_cmdline;
+const char *initrd_filename;
+} loaderparams;
+
+static void prom_set(uint32_t* prom_buf, int index, const char *string, ...)
+{
+va_list ap;
+int32_t table_addr;
+
+if (index = ENVP_NB_ENTRIES)
+return;
+
+if (string == NULL) {
+prom_buf[index] = 0;
+return;
+}
+
+table_addr = sizeof(int32_t) * ENVP_NB_ENTRIES + index * ENVP_ENTRY_SIZE;
+prom_buf[index] = tswap32(ENVP_ADDR + table_addr);
+
+va_start(ap, string);
+vsnprintf((char *)prom_buf + table_addr, ENVP_ENTRY_SIZE, string, ap);
+va_end(ap);
+}
+
+static int64_t load_kernel (CPUState *env)
+{
+int64_t kernel_entry, kernel_low, kernel_high;
+int index = 0;
+long initrd_size;
+ram_addr_t initrd_offset;
+uint32_t *prom_buf;
+long prom_size;
+
+if (load_elf(loaderparams.kernel_filename, cpu_mips_kseg0_to_phys, NULL,
+ (uint64_t *)kernel_entry, (uint64_t *)kernel_low,
+ (uint64_t *)kernel_high, 0, ELF_MACHINE, 1)  0) {
+fprintf(stderr, qemu: could not load kernel '%s'\n,
+loaderparams.kernel_filename);
+exit(1);
+}
+
+/* load initrd */
+initrd_size = 0;
+initrd_offset = 0;
+if (loaderparams.initrd_filename) {
+initrd_size = get_image_size (loaderparams.initrd_filename);
+if (initrd_size  0) {
+initrd_offset = (kernel_high + ~TARGET_PAGE_MASK)  
TARGET_PAGE_MASK;
+if (initrd_offset + initrd_size  ram_size) {
+fprintf(stderr,
+qemu: memory too small for initial ram disk '%s'\n,
+loaderparams.initrd_filename);
+exit(1);
+}
+initrd_size = load_image_targphys(loaderparams.initrd_filename,
+ initrd_offset, ram_size - initrd_offset);
+}
+if (initrd_size == (target_ulong) -1) {
+fprintf(stderr, qemu: could not load initial ram disk '%s'\n,
+loaderparams.initrd_filename);
+exit(1);
+}
+}
+
+/* Setup prom parameters. */
+prom_size = ENVP_NB_ENTRIES * (sizeof(int32_t) + ENVP_ENTRY_SIZE);
+prom_buf = qemu_malloc(prom_size);
+
+prom_set(prom_buf, index++, loaderparams.kernel_filename);
+if (initrd_size  0) {
+prom_set(prom_buf, index++, rd_start=0x PRIx64  rd_size=%li %s,
+ cpu_mips_phys_to_kseg0(NULL, initrd_offset), initrd_size,
+ loaderparams.kernel_cmdline);
+} else {
+prom_set(prom_buf, index++, loaderparams.kernel_cmdline);
+}
+
+/* Setup minimum environment variables */
+prom_set(prom_buf, index++, busclock=3300);
+ 

[Qemu-devel] [PATCH v4 2/7] MIPS: Initial support of vt82686b south bridge used by fulong mini pc

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 Makefile.target |2 +-
 hw/pci_ids.h|8 +
 hw/vt82c686.c   |  567 +++
 hw/vt82c686.h   |   11 +
 4 files changed, 587 insertions(+), 1 deletions(-)
 create mode 100644 hw/vt82c686.c
 create mode 100644 hw/vt82c686.h

diff --git a/Makefile.target b/Makefile.target
index ac36e2c..92ba282 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -221,7 +221,7 @@ obj-mips-y += vga.o i8259.o
 obj-mips-y += g364fb.o jazz_led.o
 obj-mips-y += gt64xxx.o mc146818rtc.o
 obj-mips-y += piix4.o cirrus_vga.o
-obj-mips-$(CONFIG_FULONG) += bonito.o
+obj-mips-$(CONFIG_FULONG) += bonito.o vt82c686.o
 
 obj-microblaze-y = petalogix_s3adsp1800_mmu.o
 
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index fe7a121..39e9f1d 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -78,6 +78,14 @@
 
 #define PCI_VENDOR_ID_XILINX 0x10ee
 
+#define PCI_VENDOR_ID_VIA0x1106
+#define PCI_DEVICE_ID_VIA_ISA_BRIDGE 0x0686
+#define PCI_DEVICE_ID_VIA_IDE0x0571
+#define PCI_DEVICE_ID_VIA_UHCI   0x3038
+#define PCI_DEVICE_ID_VIA_ACPI   0x3057
+#define PCI_DEVICE_ID_VIA_AC97   0x3058
+#define PCI_DEVICE_ID_VIA_MC97   0x3068
+
 #define PCI_VENDOR_ID_MARVELL0x11ab
 
 #define PCI_VENDOR_ID_ENSONIQ0x1274
diff --git a/hw/vt82c686.c b/hw/vt82c686.c
new file mode 100644
index 000..12221b7
--- /dev/null
+++ b/hw/vt82c686.c
@@ -0,0 +1,567 @@
+/*
+ * VT82C686B south bridge support
+ *
+ * Copyright (c) 2008 yajin (ya...@vm-kernel.org)
+ * Copyright (c) 2009 chenming (chenm...@rdc.faw.com.cn)
+ * Copyright (c) 2010 Huacai Chen (zltjiang...@gmail.com)
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include hw.h
+#include pc.h
+#include vt82c686.h
+#include i2c.h
+#include smbus.h
+#include pci.h
+#include isa.h
+#include sysbus.h
+#include mips.h
+#include apm.h
+#include acpi.h
+#include pm_smbus.h
+
+typedef uint32_t pci_addr_t;
+#include pci_host.h
+//#define DEBUG_VT82C686B
+
+#ifdef DEBUG_VT82C686B
+#define DPRINTF(fmt, ...) fprintf(stderr, %s:  fmt, __FUNCTION__, 
##__VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...)
+#endif
+
+typedef struct SuperIOConfig
+{
+uint8_t config[0xff];
+uint8_t index;
+uint8_t data;
+} SuperIOConfig;
+
+typedef struct VT82C686BState {
+PCIDevice dev;
+SuperIOConfig *superio_conf;
+} VT82C686BState;
+
+static void superio_ioport_writeb(void *opaque, uint32_t addr, uint32_t data)
+{
+int can_write;
+SuperIOConfig *superio_conf = (SuperIOConfig *)opaque;
+
+DPRINTF(superio_ioport_writeb  address 0x%x  val 0x%x  \n, addr, data);
+if (addr == 0x3f0) {
+superio_conf-index = data  0xff;
+} else {
+/* 0x3f1 */
+switch (superio_conf-index) {
+case 0x00 ... 0xdf:
+case 0xe4:
+case 0xe5:
+case 0xe9 ... 0xed:
+case 0xf3:
+case 0xf5:
+case 0xf7:
+case 0xf9 ... 0xfb:
+case 0xfd ... 0xff:
+can_write = 0;
+break;
+default:
+can_write = 1;
+
+if (can_write) {
+switch (superio_conf-index) {
+case 0xe7:
+if ((data  0xff) != 0xfe) {
+DPRINTF(chage uart 1 base. unsupported yet \n);
+}
+break;
+case 0xe8:
+if ((data  0xff) != 0xbe) {
+DPRINTF(chage uart 2 base. unsupported yet \n);
+}
+break;
+
+default:
+superio_conf-config[superio_conf-index] = data  0xff;
+}
+}
+}
+superio_conf-config[superio_conf-index] = data  0xff;
+}
+}
+
+static uint32_t superio_ioport_readb(void *opaque, uint32_t addr)
+{
+SuperIOConfig *superio_conf = (SuperIOConfig *)opaque;
+
+DPRINTF(superio_ioport_readb  address 0x%x   \n, addr);
+return (superio_conf-config[superio_conf-index]);
+}
+
+static void vt82c686b_reset(void * opaque)
+{
+PCIDevice *d = opaque;
+uint8_t *pci_conf = d-config;
+VT82C686BState *vt82c = DO_UPCAST(VT82C686BState, dev, d);
+
+pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x00c0);
+pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER | PCI_COMMAND_SPECIAL);
+pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_DEVSEL_MEDIUM);
+
+pci_conf[0x48] = 0x01; /* Miscellaneous Control 3 */
+pci_conf[0x4a] = 0x04; /* IDE interrupt Routing */
+pci_conf[0x4f] = 0x03; /* DMA/Master Mem Access Control 3 */
+pci_conf[0x50] = 0x2d; /* PnP DMA Request Control */
+pci_conf[0x59] = 0x04;
+pci_conf[0x5a] = 0x04; /* KBC/RTC Control*/
+pci_conf[0x5f] = 0x04;
+pci_conf[0x77] = 0x10; /* GPIO Control 1/2/3/4 */
+
+vt82c-superio_conf-config[0xe0] = 0x3c;
+   

[Qemu-devel] [PATCH v4 5/7] MIPS: Initial support of fulong mini pc (CPU definition)

2010-06-05 Thread Huacai Chen
Signed-off-by: Huacai Chen zltjiang...@gmail.com
---
 target-mips/mips-defs.h  |4 
 target-mips/translate_init.c |   35 +++
 2 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/target-mips/mips-defs.h b/target-mips/mips-defs.h
index c57de02..38594da 100644
--- a/target-mips/mips-defs.h
+++ b/target-mips/mips-defs.h
@@ -40,6 +40,8 @@
 #defineASE_SMARTMIPS   0x0004
 
 /* Chip specific instructions. */
+#defineINSN_LOONGSON2E  0x2000
+#defineINSN_LOONGSON2F  0x4000
 #defineINSN_VR54XX 0x8000
 
 /* MIPS CPU defines. */
@@ -48,6 +50,8 @@
 #defineCPU_MIPS3   (CPU_MIPS2 | ISA_MIPS3)
 #defineCPU_MIPS4   (CPU_MIPS3 | ISA_MIPS4)
 #defineCPU_VR54XX  (CPU_MIPS4 | INSN_VR54XX)
+#defineCPU_LOONGSON2E  (CPU_MIPS3 | INSN_LOONGSON2E)
+#defineCPU_LOONGSON2F  (CPU_MIPS3 | INSN_LOONGSON2F)
 
 #defineCPU_MIPS5   (CPU_MIPS4 | ISA_MIPS5)
 
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index b79ed56..0d9899e 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -454,6 +454,41 @@ static const mips_def_t mips_defs[] =
 .insn_flags = CPU_MIPS64R2 | ASE_MIPS3D,
 .mmu_type = MMU_TYPE_R4000,
 },
+{
+.name = Loongson-2E,
+.CP0_PRid = 0x6302,
+/*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
+.CP0_Config0 = (0x117) | (0x116) | (0x111) | (0x18) | (0x15) 
|
+   (0x14) | (0x11),
+/* Note: Config1 is only used internally, Loongson-2E has only 
Config0. */
+.CP0_Config1 = (1  CP0C1_FP) | (47  CP0C1_MMU),
+.SYNCI_Step = 16,
+.CCRes = 2,
+.CP0_Status_rw_bitmask = 0x35D0,
+.CP1_fcr0 = (0x5  FCR0_PRID) | (0x1  FCR0_REV),
+.SEGBITS = 40,
+.PABITS = 40,
+.insn_flags = CPU_LOONGSON2E,
+.mmu_type = MMU_TYPE_R4000,
+},
+{
+  .name = Loongson-2F,
+  .CP0_PRid = 0x6303,
+  /*64KB I-cache and d-cache. 4 way with 32 bit cache line size*/
+  .CP0_Config0 = (0x117) | (0x116) | (0x111) | (0x18) | (0x15) |
+ (0x14) | (0x11),
+  /* Note: Config1 is only used internally, Loongson-2F has only Config0. 
*/
+  .CP0_Config1 = (1  CP0C1_FP) | (47  CP0C1_MMU),
+  .SYNCI_Step = 16,
+  .CCRes = 2,
+  .CP0_Status_rw_bitmask = 0xF5D0FF1F,   /*bit5:7 not writeable*/
+  .CP1_fcr0 = (0x5  FCR0_PRID) | (0x1  FCR0_REV),
+  .SEGBITS = 40,
+  .PABITS = 40,
+  .insn_flags = CPU_LOONGSON2F,
+  .mmu_type = MMU_TYPE_R4000,
+},
+
 #endif
 };
 
-- 
1.7.0.4




Re: [Qemu-devel] Re: [RFT][PATCH 07/15] qemu_irq: Add IRQ handlers with delivery feedback

2010-06-05 Thread Jan Kiszka
Blue Swirl wrote:
 On Sat, Jun 5, 2010 at 8:27 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Sat, Jun 5, 2010 at 12:04 AM, Jan Kiszka jan.kis...@web.de wrote:
 Blue Swirl wrote:
 On Thu, Jun 3, 2010 at 7:06 AM, Gleb Natapov g...@redhat.com wrote:
 On Thu, Jun 03, 2010 at 10:03:00AM +0300, Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:59:23AM +0200, Jan Kiszka wrote:
 Gleb Natapov wrote:
 On Thu, Jun 03, 2010 at 08:23:46AM +0200, Jan Kiszka wrote:
 Blue Swirl wrote:
 But how about if we introduced instead a message based IRQ? Then the
 message could specify the originator device, maybe ACK/coalesce/NACK
 callbacks and a bigger payload than just 1 bit of level. I think 
 that
 could achieve the same coalescing effect as what the bidirectional
 IRQ. The payload could be useful for other purposes, for example
 Sparc64 IRQ messages contain three 64 bit words.
 If there are more users than just IRQ de-coalescing, this indeed 
 sounds
 superior. We could pass objects like this one around:

 struct qemu_irq_msg {
  void (*delivery_cb)(int result);
  void *payload;
 };

 They would be valid within the scope of the IRQ handlers. Whoever
 terminates or actually delivers the IRQ would invoke the callback. 
 And
 platforms like sparc64 could evaluate the additional payload pointer 
 in
 their irqchips or wherever they need it. IRQ routers on platforms 
 that
 make use of these messages would have to replicate them when 
 forwarding
 an event.

 OK?

 Let me see if I understand you correctly. qemu_set_irq() will get
 additional parameter qemu_irq_msg and if irq was not coalesced
 delivery_cb is called, so there is a guaranty that if delivery_cb is
 called it is done before qemu_set_irq() returns. Correct?
 If the side that triggers an IRQ passes a message object with a 
 non-NULL
 callback, it is supposed to be called unconditionally, passing the
 result of the delivery (delivered, masked, coalesced). And yes, the
 callback will be invoked in the context of the irq handler, so before
 qemu_set_irq (or rather some new qemu_set_irq_msg) returns.

 Looks fine to me.

 Except that delivery_cb should probably get pointer to qemu_irq_msg as a
 parameter.
 I'd like to also support EOI handling. When the guest clears the
 interrupt condtion, the EOI callback would be called. This could occur
 much later than the IRQ delivery time. I'm not sure if we need the
 result code in that case.

 If any intermediate device (IOAPIC?) needs to be informed about either
 delivery or EOI also, it could create a proxy message with its
 callbacks in place. But we need then a separate opaque field (in
 addition to payload) to store the original message.

 struct IRQMsg {
  DeviceState *src;
  void (*delivery_cb)(IRQMsg *msg, int result);
  void (*eoi_cb)(IRQMsg *msg, int result);
  void *src_opaque;
  void *payload;
 };
 Extending the lifetime of IRQMsg objects beyond the delivery call stack
 means qemu_malloc/free for every delivery. I think it takes a _very_
 appealing reason to justify this. But so far I do not see any use case
 for eio_cb at all.
 I think it's safer to use allocation model anyway because this will be
 generic code. For example, an intermediate device may want to queue
 the IRQs. Alternatively, the callbacks could use DeviceState and some
 opaque which can be used as the callback context:
   void (*delivery_cb)(DeviceState *src, void *src_opaque, int result);

 EOI can be added later if needed, QEMU seems to work fine now without
 it. But based on IOAPIC data sheet, I'd suppose it should be need to
 pass EOI from LAPIC to IOAPIC. It could be used by coalescing as
 another opportunity to inject IRQs though I guess the guest will ack
 the IRQ at the same time for both RTC and APIC.
 Let's wait for a real use case for an extended IRQMsg lifetime. For now
 we are fine with stack-allocated messages which are way simpler to
 handle. I'm already drafting a first prototype based on this model.
 Switching to dynamic allocation may still happen later on once the
 urgent need shows up.
 
 Passing around stack allocated objects is asking for trouble. I'd much
 rather use the DeviceState/opaque version then, so at least
 destination should not need to use IRQMsg for anything.

Right, I've hiden the IRQMsg object from the target handler, temporarily
storing it in qemu_irq instead. qemu_irq_handler had to be touched
anyway, so I'm now passing the IRQ object to it so that it can invoke
services to trigger the delivery callback or obtain the payload.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] Re: [V9fs-developer] [PATCH] virtio-9p: getattr server implementation for 9P2000.L protocol.

2010-06-05 Thread Aneesh Kumar K. V
On Fri, 04 Jun 2010 07:59:42 -0700, Venkateswararao Jujjuri (JV) 
jv...@linux.vnet.ibm.com wrote:
 Aneesh Kumar K. V wrote:
  On Thu, 3 Jun 2010 18:29:02 +0530, Sripathi Kodi sripat...@in.ibm.com 
  wrote:
  On Wed, 02 Jun 2010 19:49:24 +0530
  Aneesh Kumar K. V aneesh.ku...@linux.vnet.ibm.com wrote:
 
  On Fri, 28 May 2010 16:08:43 +0530, Sripathi Kodi sripat...@in.ibm.com 
  wrote:
  From: M. Mohan Kumar mo...@in.ibm.com
 
  SYNOPSIS
 
size[4] Tgetattr tag[2] fid[4]
 
size[4] Rgetattr tag[2] lstat[n]
 
 DESCRIPTION
 
The getattr transaction inquires about the file identified by fid.
The reply will contain a machine-independent directory entry,
laid out as follows:
 
   qid.type[1]
  the type of the file (directory, etc.), represented as a bit
  vector corresponding to the high 8 bits of the file's mode
  word.
 
   qid.vers[4]
  version number for given path
 
   qid.path[8]
  the file server's unique identification for the file
 
   st_mode[4]
  Permission and flags
 
   st_nlink[8]
  Number of hard links
 
   st_uid[4]
  User id of owner
 
   st_gid[4]
  Group ID of owner
 
   st_rdev[8]
  Device ID (if special file)
 
   st_size[8]
  Size, in bytes
 
   st_blksize[8]
  Block size for file system IO
 
  So it should be scaled by iounit right ? If we say 9p block size is 
  iounit.
  Yes, I think it should be iounit. Currently st_blksize being returned
  in stat structure to the user space does not use this field that comes
  from the server. It is being calculated as follows in
  generic_fillattr():
 
  stat-blksize = (1  inode-i_blkbits);
 
  So there may not be a need to put st_blksize on the protocol. Further,
  inode-i_blkbits is copied from sb-s_blocksize_bits. For 9P this value
  is obtained as:
  
  That is what linux kernel currently does. But from the protocol point of
  view and not looking at specific linux implementation i would suggest to
  put st_blksize on wire. 
 
 This is part of .L protocol. Specifically for Linux systems. So, if Linux is 
 already
 doing it, I don't think we need to repeat it.
 

But nothing prevents from changing Linux internal implementation. So we
can't depend on Linux kernel internal implementation. Later in 2.6.x we
may not derive stat-blksize from inode-i_blkbits at all. So we cannot
depend on Linux kernel internal implementation.

-aneesh



[Qemu-devel] qemu problem with xp, maybe off topic for this list

2010-06-05 Thread Kristoffer Gustafsson
Hello!
Sorry for mailing about something here that should be on a qemu users forum, 
but can't find a such one.
I'm having trouble with installing windows xp on my qemu usting linux.
I'm totally blind, so I use unattended xp installation.
Everything gose fine until the final reboot, then the win xp just refuces to 
start.
what can this be?
I've tried 2 xp:s so it can't be the windows unattended installation that's 
wrong, but something in qemu
I created images using two guides on the internet.
one is at
http://www.dreamwld.com/qem.txt
I loaded the kqemu module with modprobe kqemu, and then I ran the installation 
from the iso file.
/Kristoffer

Kristoffer Gustafsson
Trelleborgsvägen 1b
514 33 Tranemo

tel: 0325-42093
mobil: 073-8226473
e-post: k...@dreamwld.com
Eller
kristoffer_gustafs...@allmail.net


[Qemu-devel] Re: option-rom (was cg14)

2010-06-05 Thread Bob Breuer
Blue Swirl wrote:
 On Fri, Jun 4, 2010 at 5:40 PM, Artyom Tarasenko
 atar4q...@googlemail.com wrote:
   
 2010/5/27 Bob Breuer breu...@mc.net:
 +/* DBRI (audio) */
 +cpu_register_physical_memory_offset(0xEE0001000ULL, 0x1, 
 bad_mem, 0xE0001000);
   
 Please add a new DBRI device ;-).
 
 Or maybe just a field in hwdef + empty_slot? :-)
   
 Or actually don't bother at all. What is expected at 0xee0001000 is
 not the DBRI device, but its FCode driver.
 I wrote a stub, but don't see that it helps to boot except one has a
 nice device name (

 Probing /obio at 2,0  cgfourteen
 Probing /io...@f,e000/s...@f,e0001000 at f,0  espdma esp sd st
 ledma le SUNW,bpp
 Probing /io...@f,e000/s...@f,e0001000 at e,0  qemu,device-stub
 Probing /io...@f,e000/s...@f,e0001000 at 0,0  Nothing there

  ) and switching off slot e probing is not necessary.


 What would be nice is a generic '-option-rom' switch which would take
 a rom address and rom file or contents
 as params. Or do we have something like this? I mean for qemu-system-sparc.
 

 Maybe SysBusDeviceInfo should have something similar to PCI .romfile
 field, or we should rather have a SBusDeviceInfo. That way ROM
 handling would be automatic.
   

With empty_slot SS-20 OBP accesses just 2 addresses for slot E:
0xEE0001000 - 8bit read (FCode)
0xEE001 - 32bit write (put DBRI into reset)

Did a little digging, slot E starts at 0xEE000 (0xE  32 | slot 
28).  On my SS-20, the DBRI FCode is only 48 bytes which is then
mirrored every 64 bytes within at least the first 8K, and the actual
registers are at offset 64K with a reported length of 256 bytes.

Besides hooking up DBRI (empty_slot or not), I would propose the
following additions to the sun4m_hwdef structure so that the other
missing pieces can then be hooked up to empty_slot.

--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -98,6 +98,10 @@ struct sun4m_hwdef {
 target_phys_addr_t serial_base, fd_base;
 target_phys_addr_t afx_base, idreg_base, dma_base, esp_base, le_base;
 target_phys_addr_t tcx_base, cs_base, apc_base, aux1_base, aux2_base;
+target_phys_addr_t dbri_base, sx_base;
+struct {
+target_phys_addr_t reg_base, vram_base;
+} vsimm[4];
 target_phys_addr_t ecc_base;
 uint32_t ecc_version;
 uint8_t nvram_machine_id;

Also, looks like OpenBIOS would need some additional ranges added under
obio and sbus.  From a SS-20:
ok cd /obio
ok .attributes
ranges       000f  f100  0100
 0001      9000  0400
 0002      9c00  0400
 0003      f000  0400
 0004      fc00  0400
device_type  hierarchical
name obio
ok cd /iommu/sbus
ok .attributes
clock-frequency  017d7840
scsi-initiator-id0007
burst-sizes  00f8007f
ranges       000e    1000
 0001    000e  1000  1000
 0002    000e  2000  1000
 0003    000e  3000  1000
 000e    000e  e000  1000
 000f    000e  f000  1000
address  ffeec000
reg  000f  e0001000  0020
slot-address-bits001c
up-burst-sizes   003f
device_type  hierarchical
name sbus




Re: [Qemu-devel] qemu problem with xp, maybe off topic for this list

2010-06-05 Thread Blue Swirl
On Sat, Jun 5, 2010 at 7:44 PM, Kristoffer Gustafsson k...@dreamwld.com wrote:
 Hello!
 Sorry for mailing about something here that should be on a qemu users forum,
 but can't find a such one.

The forum seems to be down, but it should be at:
http://qemu-forum.ipi.fi/

 I'm having trouble with installing windows xp on my qemu usting linux.
 I'm totally blind, so I use unattended xp installation.
 Everything gose fine until the final reboot, then the win xp just refuces to
 start.
 what can this be?

This may be related to earlier bug report qemu hangs on shutdown or
reboot (XP guest):
https://bugs.launchpad.net/qemu/+bug/498035

The report is from 2009 and the bug seems to be still unfixed.

 I've tried 2 xp:s so it can't be the windows unattended installation that's
 wrong, but something in qemu
 I created images using two guides on the internet.
 one is at
 http://www.dreamwld.com/qem.txt
 I loaded the kqemu module with modprobe kqemu, and then I ran the
 installation from the iso file.

QEMU does not support kqemu anymore, so please use KVM module instead
if possible.



[Qemu-devel] Re: option-rom (was cg14)

2010-06-05 Thread Blue Swirl
On Sat, Jun 5, 2010 at 8:25 PM, Bob Breuer breu...@mc.net wrote:
 Blue Swirl wrote:
 On Fri, Jun 4, 2010 at 5:40 PM, Artyom Tarasenko
 atar4q...@googlemail.com wrote:

 2010/5/27 Bob Breuer breu...@mc.net:
 +    /* DBRI (audio) */
 +    cpu_register_physical_memory_offset(0xEE0001000ULL, 0x1, 
 bad_mem, 0xE0001000);

 Please add a new DBRI device ;-).

 Or maybe just a field in hwdef + empty_slot? :-)

 Or actually don't bother at all. What is expected at 0xee0001000 is
 not the DBRI device, but its FCode driver.
 I wrote a stub, but don't see that it helps to boot except one has a
 nice device name (

 Probing /obio at 2,0  cgfourteen
 Probing /io...@f,e000/s...@f,e0001000 at f,0  espdma esp sd st
 ledma le SUNW,bpp
 Probing /io...@f,e000/s...@f,e0001000 at e,0  qemu,device-stub
 Probing /io...@f,e000/s...@f,e0001000 at 0,0  Nothing there

  ) and switching off slot e probing is not necessary.


 What would be nice is a generic '-option-rom' switch which would take
 a rom address and rom file or contents
 as params. Or do we have something like this? I mean for qemu-system-sparc.


 Maybe SysBusDeviceInfo should have something similar to PCI .romfile
 field, or we should rather have a SBusDeviceInfo. That way ROM
 handling would be automatic.


 With empty_slot SS-20 OBP accesses just 2 addresses for slot E:
    0xEE0001000 - 8bit read (FCode)
    0xEE001 - 32bit write (put DBRI into reset)

 Did a little digging, slot E starts at 0xEE000 (0xE  32 | slot 
 28).  On my SS-20, the DBRI FCode is only 48 bytes which is then
 mirrored every 64 bytes within at least the first 8K, and the actual
 registers are at offset 64K with a reported length of 256 bytes.

 Besides hooking up DBRI (empty_slot or not), I would propose the
 following additions to the sun4m_hwdef structure so that the other
 missing pieces can then be hooked up to empty_slot.

 --- a/hw/sun4m.c
 +++ b/hw/sun4m.c
 @@ -98,6 +98,10 @@ struct sun4m_hwdef {
     target_phys_addr_t serial_base, fd_base;
     target_phys_addr_t afx_base, idreg_base, dma_base, esp_base, le_base;
     target_phys_addr_t tcx_base, cs_base, apc_base, aux1_base, aux2_base;
 +    target_phys_addr_t dbri_base, sx_base;
 +    struct {
 +        target_phys_addr_t reg_base, vram_base;
 +    } vsimm[4];

OK by itself, but again: should we have a new machine with cg14 or
some switch to select TCX vs. cg14?

Maybe the recently proposed machine subtype patches could help here.

     target_phys_addr_t ecc_base;
     uint32_t ecc_version;
     uint8_t nvram_machine_id;

 Also, looks like OpenBIOS would need some additional ranges added under
 obio and sbus.  From a SS-20:
 ok cd /obio
 ok .attributes
 ranges                       000f  f100  0100
                         0001      9000  0400
                         0002      9c00  0400
                         0003      f000  0400
                         0004      fc00  0400
 device_type              hierarchical
 name                     obio
 ok cd /iommu/sbus
 ok .attributes
 clock-frequency          017d7840
 scsi-initiator-id        0007
 burst-sizes              00f8007f
 ranges                       000e    1000
                         0001    000e  1000  1000
                         0002    000e  2000  1000
                         0003    000e  3000  1000
                         000e    000e  e000  1000
                         000f    000e  f000  1000
 address                  ffeec000
 reg                      000f  e0001000  0020
 slot-address-bits        001c
 up-burst-sizes           003f
 device_type              hierarchical
 name                     sbus

Again, the question is how to pass cg14 vs. TCX info to OpenBIOS.



[Qemu-devel] [PATCH 0/6] APIC/IOAPIC cleanups

2010-06-05 Thread Blue Swirl
Devices should not need to access CPUState. Minimize CPUState use in APIC.

Blue Swirl (6):
  ioapic: unexport ioapic_set_irq
  apic.h: rearrange
  ioapic: add ioapic.h
  apic: avoid passing CPUState from devices
  apic: avoid passing CPUState from CPU code
  apic: avoid using CPUState internals

 hw/apic.c   |   95 ++
 hw/apic.h   |   21 +++---
 hw/ioapic.c |3 +-
 hw/ioapic.h |7 +++
 hw/pc.c |   22 --
 hw/pc_piix.c|2 +-
 target-i386/cpu.h   |   26 -
 target-i386/cpuid.c |6 +++
 target-i386/helper.c|4 +-
 target-i386/kvm.c   |   14 +++---
 target-i386/op_helper.c |8 ++--
 11 files changed, 113 insertions(+), 95 deletions(-)
 create mode 100644 hw/ioapic.h



[Qemu-devel] [PATCH 1/6] ioapic: unexport ioapic_set_irq

2010-06-05 Thread Blue Swirl
There's no need to use ioapic_set_irq() outside of ioapic.c, so
make it static.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.h   |1 -
 hw/ioapic.c |2 +-
 2 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/hw/apic.h b/hw/apic.h
index 132fcab..e1954f4 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -11,7 +11,6 @@ int apic_accept_pic_intr(CPUState *env);
 void apic_deliver_pic_intr(CPUState *env, int level);
 int apic_get_interrupt(CPUState *env);
 qemu_irq *ioapic_init(void);
-void ioapic_set_irq(void *opaque, int vector, int level);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

diff --git a/hw/ioapic.c b/hw/ioapic.c
index 335da6e..e3f8a46 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -94,7 +94,7 @@ static void ioapic_service(IOAPICState *s)
 }
 }

-void ioapic_set_irq(void *opaque, int vector, int level)
+static void ioapic_set_irq(void *opaque, int vector, int level)
 {
 IOAPICState *s = opaque;

-- 
1.7.1



[Qemu-devel] [PATCH 2/6] apic.h: rearrange

2010-06-05 Thread Blue Swirl
Rearrange and add comments to tell where each function is implemented.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.h |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/apic.h b/hw/apic.h
index e1954f4..c92d188 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -1,7 +1,7 @@
 #ifndef APIC_H
 #define APIC_H

-typedef struct IOAPICState IOAPICState;
+/* apic.c */
 void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
@@ -10,10 +10,14 @@ int apic_init(CPUState *env);
 int apic_accept_pic_intr(CPUState *env);
 void apic_deliver_pic_intr(CPUState *env, int level);
 int apic_get_interrupt(CPUState *env);
-qemu_irq *ioapic_init(void);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

+/* ioapic.c */
+typedef struct IOAPICState IOAPICState;
+qemu_irq *ioapic_init(void);
+
+/* pc.c */
 int cpu_is_bsp(CPUState *env);

 #endif
-- 
1.7.1



[Qemu-devel] [PATCH 4/6] apic: avoid passing CPUState from devices

2010-06-05 Thread Blue Swirl
Pass only APICState from pc.c.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.c |   32 ++--
 hw/apic.h |7 ---
 hw/pc.c   |   10 ++
 3 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 7fbd79b..c4dc52c 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -94,7 +94,7 @@
 #define MSI_ADDR_BASE   0xfee0
 #define MSI_ADDR_SIZE   0x10

-typedef struct APICState {
+struct APICState {
 CPUState *cpu_env;
 uint32_t apicbase;
 uint8_t id;
@@ -118,7 +118,7 @@ typedef struct APICState {
 QEMUTimer *timer;
 int sipi_vector;
 int wait_for_sipi;
-} APICState;
+};

 static int apic_io_memory;
 static APICState *local_apics[MAX_APICS + 1];
@@ -167,9 +167,8 @@ static inline int get_bit(uint32_t *tab, int index)
 return !!(tab[i]  mask);
 }

-static void apic_local_deliver(CPUState *env, int vector)
+static void apic_local_deliver(APICState *s, int vector)
 {
-APICState *s = env-apic_state;
 uint32_t lvt = s-lvt[vector];
 int trigger_mode;

@@ -180,15 +179,15 @@ static void apic_local_deliver(CPUState *env, int vector)

 switch ((lvt  8)  7) {
 case APIC_DM_SMI:
-cpu_interrupt(env, CPU_INTERRUPT_SMI);
+cpu_interrupt(s-cpu_env, CPU_INTERRUPT_SMI);
 break;

 case APIC_DM_NMI:
-cpu_interrupt(env, CPU_INTERRUPT_NMI);
+cpu_interrupt(s-cpu_env, CPU_INTERRUPT_NMI);
 break;

 case APIC_DM_EXTINT:
-cpu_interrupt(env, CPU_INTERRUPT_HARD);
+cpu_interrupt(s-cpu_env, CPU_INTERRUPT_HARD);
 break;

 case APIC_DM_FIXED:
@@ -200,12 +199,11 @@ static void apic_local_deliver(CPUState *env, int vector)
 }
 }

-void apic_deliver_pic_intr(CPUState *env, int level)
+void apic_deliver_pic_intr(APICState *s, int level)
 {
-if (level)
-apic_local_deliver(env, APIC_LVT_LINT0);
-else {
-APICState *s = env-apic_state;
+if (level) {
+apic_local_deliver(s, APIC_LVT_LINT0);
+} else {
 uint32_t lvt = s-lvt[APIC_LVT_LINT0];

 switch ((lvt  8)  7) {
@@ -215,7 +213,7 @@ void apic_deliver_pic_intr(CPUState *env, int level)
 reset_bit(s-irr, lvt  0xff);
 /* fall through */
 case APIC_DM_EXTINT:
-cpu_reset_interrupt(env, CPU_INTERRUPT_HARD);
+cpu_reset_interrupt(s-cpu_env, CPU_INTERRUPT_HARD);
 break;
 }
 }
@@ -591,9 +589,8 @@ static void apic_deliver(APICState *s, uint8_t
dest, uint8_t dest_mode,
  trigger_mode);
 }

-int apic_get_interrupt(CPUState *env)
+int apic_get_interrupt(APICState *s)
 {
-APICState *s = env-apic_state;
 int intno;

 /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -615,9 +612,8 @@ int apic_get_interrupt(CPUState *env)
 return intno;
 }

-int apic_accept_pic_intr(CPUState *env)
+int apic_accept_pic_intr(APICState *s)
 {
-APICState *s = env-apic_state;
 uint32_t lvt0;

 if (!s)
@@ -679,7 +675,7 @@ static void apic_timer(void *opaque)
 {
 APICState *s = opaque;

-apic_local_deliver(s-cpu_env, APIC_LVT_TIMER);
+apic_local_deliver(s, APIC_LVT_TIMER);
 apic_timer_update(s, s-next_time);
 }

diff --git a/hw/apic.h b/hw/apic.h
index 419c733..7dc7c62 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -2,14 +2,15 @@
 #define APIC_H

 /* apic.c */
+typedef struct APICState APICState;
 void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
  uint8_t trigger_mode);
 int apic_init(CPUState *env);
-int apic_accept_pic_intr(CPUState *env);
-void apic_deliver_pic_intr(CPUState *env, int level);
-int apic_get_interrupt(CPUState *env);
+int apic_accept_pic_intr(APICState *s);
+void apic_deliver_pic_intr(APICState *s, int level);
+int apic_get_interrupt(APICState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

diff --git a/hw/pc.c b/hw/pc.c
index 9b85c42..fe4ebbe 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -142,7 +142,7 @@ int cpu_get_pic_interrupt(CPUState *env)
 {
 int intno;

-intno = apic_get_interrupt(env);
+intno = apic_get_interrupt(env-apic_state);
 if (intno = 0) {
 /* set irq request if a PIC irq is still pending */
 /* XXX: improve that */
@@ -150,8 +150,9 @@ int cpu_get_pic_interrupt(CPUState *env)
 return intno;
 }
 /* read the irq from the PIC */
-if (!apic_accept_pic_intr(env))
+if (!apic_accept_pic_intr(env-apic_state)) {
 return -1;
+}

 intno = pic_read_irq(isa_pic);
 return intno;
@@ -164,8 +165,9 @@ static void pic_irq_request(void *opaque, int irq,
int level)
 DPRINTF(pic_irqs: %s irq %d\n, level? raise : lower, irq);
 if (env-apic_state) {
 while (env) {
-if (apic_accept_pic_intr(env))
- 

[Qemu-devel] [PATCH 3/6] ioapic: add ioapic.h

2010-06-05 Thread Blue Swirl
Move IOAPIC declarations to a separate file.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.h|4 
 hw/ioapic.c  |1 +
 hw/ioapic.h  |7 +++
 hw/pc_piix.c |2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)
 create mode 100644 hw/ioapic.h

diff --git a/hw/apic.h b/hw/apic.h
index c92d188..419c733 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -13,10 +13,6 @@ int apic_get_interrupt(CPUState *env);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);

-/* ioapic.c */
-typedef struct IOAPICState IOAPICState;
-qemu_irq *ioapic_init(void);
-
 /* pc.c */
 int cpu_is_bsp(CPUState *env);

diff --git a/hw/ioapic.c b/hw/ioapic.c
index e3f8a46..0bcfff2 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -22,6 +22,7 @@

 #include hw.h
 #include pc.h
+#include ioapic.h
 #include apic.h
 #include qemu-timer.h
 #include host-utils.h
diff --git a/hw/ioapic.h b/hw/ioapic.h
new file mode 100644
index 000..0cbaad9
--- /dev/null
+++ b/hw/ioapic.h
@@ -0,0 +1,7 @@
+#ifndef IOAPIC_H
+#define IOAPIC_H
+
+typedef struct IOAPICState IOAPICState;
+qemu_irq *ioapic_init(void);
+
+#endif
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 70f563a..ac207f1 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -24,7 +24,7 @@

 #include hw.h
 #include pc.h
-#include apic.h
+#include ioapic.h
 #include pci.h
 #include usb-uhci.h
 #include usb-ohci.h
-- 
1.7.1



[Qemu-devel] [PATCH 6/6] apic: avoid using CPUState internals

2010-06-05 Thread Blue Swirl
Use only an opaque CPUState pointer and move the actual CPUState
contents handling to cpu.h and cpuid.c.

Set env-halted in pc.c and add a function to get the local APIC state
of the current CPU for the MMIO.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.c   |   40 +++-
 hw/apic.h   |9 -
 hw/pc.c |   12 +++-
 target-i386/cpu.h   |   27 ---
 target-i386/cpuid.c |6 ++
 5 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 91c8d93..332c66e 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -95,7 +95,7 @@
 #define MSI_ADDR_SIZE   0x10

 struct APICState {
-CPUState *cpu_env;
+void *cpu_env;
 uint32_t apicbase;
 uint8_t id;
 uint8_t arb_id;
@@ -320,7 +320,7 @@ void cpu_set_apic_base(APICState *s, uint64_t val)
 /* if disabled, cannot be enabled again */
 if (!(val  MSR_IA32_APICBASE_ENABLE)) {
 s-apicbase = ~MSR_IA32_APICBASE_ENABLE;
-s-cpu_env-cpuid_features = ~CPUID_APIC;
+cpu_clear_apic_feature(s-cpu_env);
 s-spurious_vec = ~APIC_SV_ENABLE;
 }
 }
@@ -508,8 +508,6 @@ void apic_init_reset(APICState *s)
 s-initial_count_load_time = 0;
 s-next_time = 0;
 s-wait_for_sipi = 1;
-
-s-cpu_env-halted = !(s-apicbase  MSR_IA32_APICBASE_BSP);
 }

 static void apic_startup(APICState *s, int vector_num)
@@ -524,13 +522,7 @@ void apic_sipi(APICState *s)

 if (!s-wait_for_sipi)
 return;
-
-s-cpu_env-eip = 0;
-cpu_x86_load_seg_cache(s-cpu_env, R_CS, s-sipi_vector  8,
-   s-sipi_vector  12,
-   s-cpu_env-segs[R_CS].limit,
-   s-cpu_env-segs[R_CS].flags);
-s-cpu_env-halted = 0;
+cpu_x86_load_seg_cache_sipi(s-cpu_env, s-sipi_vector);
 s-wait_for_sipi = 0;
 }

@@ -692,15 +684,14 @@ static void apic_mem_writew(void *opaque,
target_phys_addr_t addr, uint32_t val)

 static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
 {
-CPUState *env;
 APICState *s;
 uint32_t val;
 int index;

-env = cpu_single_env;
-if (!env)
+s = cpu_get_current_apic();
+if (!s) {
 return 0;
-s = env-apic_state;
+}

 index = (addr  4)  0xff;
 switch(index) {
@@ -782,7 +773,6 @@ static void apic_send_msi(target_phys_addr_t addr,
uint32 data)

 static void apic_mem_writel(void *opaque, target_phys_addr_t addr,
uint32_t val)
 {
-CPUState *env;
 APICState *s;
 int index = (addr  4)  0xff;
 if (addr  0xfff || !index) {
@@ -795,10 +785,10 @@ static void apic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t val)
 return;
 }

-env = cpu_single_env;
-if (!env)
+s = cpu_get_current_apic();
+if (!s) {
 return;
-s = env-apic_state;
+}

 DPRINTF(write:  TARGET_FMT_plx  = %08x\n, addr, val);

@@ -974,16 +964,16 @@ static CPUWriteMemoryFunc * const apic_mem_write[3] = {
 apic_mem_writel,
 };

-int apic_init(CPUState *env)
+APICState *apic_init(void *env, uint32_t apic_id)
 {
 APICState *s;

-if (last_apic_idx = MAX_APICS)
-return -1;
+if (last_apic_idx = MAX_APICS) {
+return NULL;
+}
 s = qemu_mallocz(sizeof(APICState));
-env-apic_state = s;
 s-idx = last_apic_idx++;
-s-id = env-cpuid_apic_id;
+s-id = apic_id;
 s-cpu_env = env;

 msix_supported = 1;
@@ -1004,5 +994,5 @@ int apic_init(CPUState *env)
 qemu_register_reset(apic_reset, s);

 local_apics[s-idx] = s;
-return 0;
+return s;
 }
diff --git a/hw/apic.h b/hw/apic.h
index 7dc7c62..a6af078 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -7,14 +7,21 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  uint8_t delivery_mode,
  uint8_t vector_num, uint8_t polarity,
  uint8_t trigger_mode);
-int apic_init(CPUState *env);
+APICState *apic_init(void *env, uint32_t apic_id);
 int apic_accept_pic_intr(APICState *s);
 void apic_deliver_pic_intr(APICState *s, int level);
 int apic_get_interrupt(APICState *s);
 void apic_reset_irq_delivered(void);
 int apic_get_irq_delivered(void);
+void cpu_set_apic_base(APICState *s, uint64_t val);
+uint64_t cpu_get_apic_base(APICState *s);
+void cpu_set_apic_tpr(APICState *s, uint8_t val);
+uint8_t cpu_get_apic_tpr(APICState *s);
+void apic_init_reset(APICState *s);
+void apic_sipi(APICState *s);

 /* pc.c */
 int cpu_is_bsp(CPUState *env);
+APICState *cpu_get_current_apic(void);

 #endif
diff --git a/hw/pc.c b/hw/pc.c
index fe4ebbe..913e461 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -749,6 +749,15 @@ int cpu_is_bsp(CPUState *env)
 return env-cpu_index == 0;
 }

+APICState *cpu_get_current_apic(void)
+{
+if (cpu_single_env) {
+return cpu_single_env-apic_state;
+} else {
+return NULL;
+}
+}
+
 /* set CMOS 

[Qemu-devel] [PATCH 5/6] apic: avoid passing CPUState from CPU code

2010-06-05 Thread Blue Swirl
Pass only APICState when accessing APIC from CPU code.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/apic.c   |   39 ---
 target-i386/cpu.h   |   13 +++--
 target-i386/helper.c|4 ++--
 target-i386/kvm.c   |   14 +++---
 target-i386/op_helper.c |8 
 5 files changed, 36 insertions(+), 42 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index c4dc52c..91c8d93 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -310,10 +310,8 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode,
  trigger_mode);
 }

-void cpu_set_apic_base(CPUState *env, uint64_t val)
+void cpu_set_apic_base(APICState *s, uint64_t val)
 {
-APICState *s = env-apic_state;
-
 DPRINTF(cpu_set_apic_base: %016 PRIx64 \n, val);
 if (!s)
 return;
@@ -322,32 +320,28 @@ void cpu_set_apic_base(CPUState *env, uint64_t val)
 /* if disabled, cannot be enabled again */
 if (!(val  MSR_IA32_APICBASE_ENABLE)) {
 s-apicbase = ~MSR_IA32_APICBASE_ENABLE;
-env-cpuid_features = ~CPUID_APIC;
+s-cpu_env-cpuid_features = ~CPUID_APIC;
 s-spurious_vec = ~APIC_SV_ENABLE;
 }
 }

-uint64_t cpu_get_apic_base(CPUState *env)
+uint64_t cpu_get_apic_base(APICState *s)
 {
-APICState *s = env-apic_state;
-
 DPRINTF(cpu_get_apic_base: %016 PRIx64 \n,
 s ? (uint64_t)s-apicbase: 0);
 return s ? s-apicbase : 0;
 }

-void cpu_set_apic_tpr(CPUX86State *env, uint8_t val)
+void cpu_set_apic_tpr(APICState *s, uint8_t val)
 {
-APICState *s = env-apic_state;
 if (!s)
 return;
 s-tpr = (val  0x0f)  4;
 apic_update_irq(s);
 }

-uint8_t cpu_get_apic_tpr(CPUX86State *env)
+uint8_t cpu_get_apic_tpr(APICState *s)
 {
-APICState *s = env-apic_state;
 return s ? s-tpr  4 : 0;
 }

@@ -490,9 +484,8 @@ static void apic_get_delivery_bitmask(uint32_t
*deliver_bitmask,
 }


-void apic_init_reset(CPUState *env)
+void apic_init_reset(APICState *s)
 {
-APICState *s = env-apic_state;
 int i;

 if (!s)
@@ -516,7 +509,7 @@ void apic_init_reset(CPUState *env)
 s-next_time = 0;
 s-wait_for_sipi = 1;

-env-halted = !(s-apicbase  MSR_IA32_APICBASE_BSP);
+s-cpu_env-halted = !(s-apicbase  MSR_IA32_APICBASE_BSP);
 }

 static void apic_startup(APICState *s, int vector_num)
@@ -525,19 +518,19 @@ static void apic_startup(APICState *s, int vector_num)
 cpu_interrupt(s-cpu_env, CPU_INTERRUPT_SIPI);
 }

-void apic_sipi(CPUState *env)
+void apic_sipi(APICState *s)
 {
-APICState *s = env-apic_state;
-
-cpu_reset_interrupt(env, CPU_INTERRUPT_SIPI);
+cpu_reset_interrupt(s-cpu_env, CPU_INTERRUPT_SIPI);

 if (!s-wait_for_sipi)
 return;

-env-eip = 0;
-cpu_x86_load_seg_cache(env, R_CS, s-sipi_vector  8,
s-sipi_vector  12,
-   env-segs[R_CS].limit, env-segs[R_CS].flags);
-env-halted = 0;
+s-cpu_env-eip = 0;
+cpu_x86_load_seg_cache(s-cpu_env, R_CS, s-sipi_vector  8,
+   s-sipi_vector  12,
+   s-cpu_env-segs[R_CS].limit,
+   s-cpu_env-segs[R_CS].flags);
+s-cpu_env-halted = 0;
 s-wait_for_sipi = 0;
 }

@@ -957,7 +950,7 @@ static void apic_reset(void *opaque)
 (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE;

 cpu_reset(s-cpu_env);
-apic_init_reset(s-cpu_env);
+apic_init_reset(s);

 if (bsp) {
 /*
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 548ab80..0b19fe3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -860,11 +860,12 @@ void cpu_x86_update_cr3(CPUX86State *env,
target_ulong new_cr3);
 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4);

 /* hw/apic.c */
-void cpu_set_apic_base(CPUX86State *env, uint64_t val);
-uint64_t cpu_get_apic_base(CPUX86State *env);
-void cpu_set_apic_tpr(CPUX86State *env, uint8_t val);
+typedef struct APICState APICState;
+void cpu_set_apic_base(APICState *s, uint64_t val);
+uint64_t cpu_get_apic_base(APICState *s);
+void cpu_set_apic_tpr(APICState *s, uint8_t val);
 #ifndef NO_CPU_IO_DEFS
-uint8_t cpu_get_apic_tpr(CPUX86State *env);
+uint8_t cpu_get_apic_tpr(APICState *s);
 #endif

 /* hw/pc.c */
@@ -942,8 +943,8 @@ static inline void cpu_get_tb_cpu_state(CPUState
*env, target_ulong *pc,
 (env-eflags  (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
 }

-void apic_init_reset(CPUState *env);
-void apic_sipi(CPUState *env);
+void apic_init_reset(APICState *s);
+void apic_sipi(APICState *s);
 void do_cpu_init(CPUState *env);
 void do_cpu_sipi(CPUState *env);
 #endif /* CPU_I386_H */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index c9508a8..718394c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1150,12 +1150,12 @@ void do_cpu_init(CPUState *env)
 int sipi = env-interrupt_request  CPU_INTERRUPT_SIPI;
 cpu_reset(env);
 env-interrupt_request = sipi;
-apic_init_reset(env);
+

Re: [Qemu-devel] [PATCH-V5 0/10] virtio-9p:Introducing security model for VirtFS

2010-06-05 Thread Venkateswararao Jujjuri (JV)
Blue Swirl wrote:
 On Sat, Jun 5, 2010 at 1:08 AM, Venkateswararao Jujjuri (JV)
 jv...@linux.vnet.ibm.com wrote:
 This patch series introduces the security model for VirtFS.

 Brief description of this patch series:

 It introduces two type of security models for VirtFS.
 They are: mapped and passthrough.

 The following is common to both security models.

 * Client's VFS determines/enforces the access control.
  Largely server should never return EACCESS.

 * Client sends gid/mode-bit information as part of creation only.

 Changes from V4
 ---
 o Most of the cosmetic changes proposed by Aneesh.
 o Divided into more number of patches and added more explanation to each 
 patch.

 Changes from V3
 ---
 o Return NULL instead of exit(1) on failure in virtio_9p_init()
 o Capitalized sm_passthrough, sm_mappe
 o Added handling for EINTR for read/write.
 o Corrected default permissions for mkdir in mapped mode.
 o Added additional error handling.

 Changes from V2
 ---
 o Removed warnings resulting from chmod/chown.
 o Added code to fail normally if secuirty_model option is not specified.

 Changes from V1
 ---
 o Added support for chmod and chown.
 o Used chmod/chown to set credentials instead of setuid/setgid.
 o Fixed a bug where uid used instated of uid.


 Security model: mapped
 --

 VirtFS server(QEMU) intercepts and maps all the file object create requests.
 Files on the fileserver will be created with QEMU's user credentials and the
 client-user's credentials are stored in extended attributes.
 During getattr() server extracts the client-user's credentials from extended
 attributes and sends to the client.

 Given that only the user space extended attributes are available to regular
 files, special files are created as regular files on the fileserver and the
 appropriate mode bits are stored in xattrs and will be extracted during
 getattr.

 If the extended attributes are missing, server sends back the filesystem
 stat() unaltered. This provision will make the files created on the
 fileserver usable to client.

 Points to be considered

 * Filesystem will be VirtFS'ized. Meaning, other filesystems may not
  understand the credentials of the files created under this model.

 * Regular utilities like 'df' may not report required results in this model.
  Need for special reporting utilities which can understand this security 
 model.
 
 Just some thought: This part could have other uses too, for example
 vext3 would be more useful than vvfat. Also VFAT attributes
 hidden/system/archived could be implemented with extended attributes.
 
 Another point to consider is that this will be Linux specific, if my
 earlier patch to make v9fs available to all Posix systems makes sense,
 this part should be conditional.

The protocol we are developing is 9P2000.L, which is Linux specific protocol.
You mean there could be some Posix systems that doesn't support extended 
attributes?

As per this page BSDs and other systems also support extended attributes.
http://en.wikipedia.org/wiki/Extended_file_attributes

I think it is good point that before we are taking the patch to make it generic 
for all
Posix systems,  may be enable per OS and start with Linux? may be we should 
wait until it becomes more stable and feature complete on Linux? before 
considering for other OSes?

Thanks,
JV





 
 Security model : passthrough
 

 In this security model, VirtFS server passes down all requests to the
 underlying filesystem. File system objects on the fileserver will be created
 with client-user's credentials. This is done by setting setuid()/setgid()
 during creation or ch* after file creation. At the end of create protocol
 request, files on the fileserver will be owned by cleint-user's uid/gid.

 Points to be considered

  * Fileserver should always run as 'root'.
  * Root squashing may be needed. Will be for future work.
  * Potential for user credential clash between guest's user space IDs and
host's user space IDs.

 It also adds security model attribute to -fsdev device and to -virtfs 
 shortcut.
 
 I'd suppose it may be useful to also allow passthrough for non-root
 users so that for example user's home directory can be shared.