Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-01-14 Thread Shivam Kumar



> On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> 
> From: Juan Quintela 
> 
> This implements the zero page dection and handling.
> 
> Signed-off-by: Juan Quintela 
> ---
> migration/multifd.c | 41 +++--
> migration/multifd.h |  5 +
> 2 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 5a1f50c7e8..756673029d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -11,6 +11,7 @@
>  */
> 
> #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
> #include "qemu/rcu.h"
> #include "exec/target_page.h"
> #include "sysemu/sysemu.h"
> @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> *p)
> 
> packet->offset[i] = cpu_to_be64(temp);
> }
> +for (i = 0; i < p->zero_num; i++) {
> +/* there are architectures where ram_addr_t is 32 bit */
> +uint64_t temp = p->zero[i];
> +
> +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> +}
> }
> 
> static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> @@ -361,6 +368,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
> *p, Error **errp)
> p->normal[i] = offset;
> }
> 
> +for (i = 0; i < p->zero_num; i++) {
> +uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
> +
> +if (offset > (p->block->used_length - p->page_size)) {
> +error_setg(errp, "multifd: offset too long %" PRIu64
> +   " (max " RAM_ADDR_FMT ")",
> +   offset, p->block->used_length);
> +return -1;
> +}
> +p->zero[i] = offset;
> +}
> +
> return 0;
> }
> 
> @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque)
> MultiFDSendParams *p = opaque;
> MigrationThread *thread = NULL;
> Error *local_err = NULL;
> +/* qemu older than 8.2 don't understand zero page on multifd channel */
> +bool use_zero_page = !migrate_use_main_zero_page();
> int ret = 0;
> bool use_zero_copy_send = migrate_zero_copy_send();
> 
> @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque)
> qemu_mutex_lock(>mutex);
> 
> if (p->pending_job) {
> +RAMBlock *rb = p->pages->block;
> uint64_t packet_num = p->packet_num;
> uint32_t flags;
> p->normal_num = 0;
> @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque)
> }
> 
> for (int i = 0; i < p->pages->num; i++) {
> -p->normal[p->normal_num] = p->pages->offset[i];
> -p->normal_num++;
> +uint64_t offset = p->pages->offset[i];
> +if (use_zero_page &&
> +buffer_is_zero(rb->host + offset, p->page_size)) {
> +p->zero[p->zero_num] = offset;
> +p->zero_num++;
> +ram_release_page(rb->idstr, offset);
> +} else {
> +p->normal[p->normal_num] = offset;
> +p->normal_num++;
> +}
> }
> 
> if (p->normal_num) {
> @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque)
> }
> }
> 
> +for (int i = 0; i < p->zero_num; i++) {
> +void *page = p->host + p->zero[i];
> +if (!buffer_is_zero(page, p->page_size)) {
> +memset(page, 0, p->page_size);
> +}
> +}
> +
I am wondering if zeroing the zero page on the destination can also be 
offloaded to DSA. Can it help in reducing cpu consumption on the destination in 
case of multifd-based migration?
> if (flags & MULTIFD_FLAG_SYNC) {
> qemu_sem_post(_recv_state->sem_sync);
> qemu_sem_wait(>sem_sync);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index d587b0e19c..13762900d4 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -53,6 +53,11 @@ typedef struct {
> uint32_t unused32[1];/* Reserved for future use */
> uint64_t unused64[3];/* Reserved for future use */
> char ramblock[256];
> +/*
> + * This array contains the pointers to:
> + *  - normal pages (initial normal_pages entries)
> + *  - zero pages (following zero_pages entries)
> + */
> uint64_t offset[];
> } __attribute__((packed)) MultiFDPacket_t;
> 
> -- 
> 2.30.2
> 
> 
> 




Re: [PATCH v3 13/20] migration/multifd: Prepare to introduce DSA acceleration on the multifd path.

2024-01-14 Thread Shivam Kumar


> On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> 
> 1. Refactor multifd_send_thread function.
> 2. Implement buffer_is_zero_use_cpu to handle CPU based zero page
> checking.
> 3. Introduce the batch task structure in MultiFDSendParams.
> 
> Signed-off-by: Hao Xiang 
> ---
> include/qemu/dsa.h  | 43 +++--
> migration/multifd.c | 77 -
> migration/multifd.h |  2 ++
> util/dsa.c  | 51 +-
> 4 files changed, 148 insertions(+), 25 deletions(-)
> 
> diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
> index e002652879..fe7772107a 100644
> --- a/include/qemu/dsa.h
> +++ b/include/qemu/dsa.h
> @@ -2,6 +2,7 @@
> #define QEMU_DSA_H
> 
> #include "qemu/error-report.h"
> +#include "exec/cpu-common.h"
> #include "qemu/thread.h"
> #include "qemu/queue.h"
> 
> @@ -42,6 +43,20 @@ typedef struct dsa_batch_task {
> QSIMPLEQ_ENTRY(dsa_batch_task) entry;
> } dsa_batch_task;
> 
> +#endif
> +
> +struct batch_task {
> +/* Address of each pages in pages */
> +ram_addr_t *addr;
> +/* Zero page checking results */
> +bool *results;
> +#ifdef CONFIG_DSA_OPT
> +struct dsa_batch_task *dsa_batch;
> +#endif
> +};
> +
> +#ifdef CONFIG_DSA_OPT
> +
> /**
>  * @brief Initializes DSA devices.
>  *
> @@ -74,7 +89,7 @@ void dsa_cleanup(void);
> bool dsa_is_running(void);
> 
> /**
> - * @brief Initializes a buffer zero batch task.
> + * @brief Initializes a buffer zero DSA batch task.
>  *
>  * @param task A pointer to the batch task to initialize.
>  * @param results A pointer to an array of zero page checking results.
> @@ -102,7 +117,7 @@ void buffer_zero_batch_task_destroy(struct dsa_batch_task 
> *task);
>  * @return Zero if successful, otherwise non-zero.
>  */
> int
> -buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
> +buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
>const void **buf, size_t count, size_t len);
> 
> #else
> @@ -128,6 +143,30 @@ static inline void dsa_stop(void) {}
> 
> static inline void dsa_cleanup(void) {}
> 
> +static inline int
> +buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
> +   const void **buf, size_t count, size_t len)
> +{
> +exit(1);
> +}
> +
> #endif
> 
> +/**
> + * @brief Initializes a general buffer zero batch task.
> + *
> + * @param task A pointer to the general batch task to initialize.
> + * @param batch_size The number of zero page checking tasks in the batch.
> + */
> +void
> +batch_task_init(struct batch_task *task, int batch_size);
> +
> +/**
> + * @brief Destroys a general buffer zero batch task.
> + *
> + * @param task A pointer to the general batch task to destroy.
> + */
> +void
> +batch_task_destroy(struct batch_task *task);
> +
> #endif
> diff --git a/migration/multifd.c b/migration/multifd.c
> index eece85569f..e7c549b93e 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -14,6 +14,8 @@
> #include "qemu/cutils.h"
> #include "qemu/rcu.h"
> #include "qemu/cutils.h"
> +#include "qemu/dsa.h"
> +#include "qemu/memalign.h"
> #include "exec/target_page.h"
> #include "sysemu/sysemu.h"
> #include "exec/ramblock.h"
> @@ -574,6 +576,8 @@ void multifd_save_cleanup(void)
> p->name = NULL;
> multifd_pages_clear(p->pages);
> p->pages = NULL;
> +batch_task_destroy(p->batch_task);
> +p->batch_task = NULL;
> p->packet_len = 0;
> g_free(p->packet);
> p->packet = NULL;
> @@ -678,13 +682,66 @@ int multifd_send_sync_main(QEMUFile *f)
> return 0;
> }
> 
> +static void set_page(MultiFDSendParams *p, bool zero_page, uint64_t offset)
> +{
> +RAMBlock *rb = p->pages->block;
> +if (zero_page) {
> +p->zero[p->zero_num] = offset;
> +p->zero_num++;
> +ram_release_page(rb->idstr, offset);
> +} else {
> +p->normal[p->normal_num] = offset;
> +p->normal_num++;
> +}
> +}
> +
> +static void buffer_is_zero_use_cpu(MultiFDSendParams *p)
> +{
> +const void **buf = (const void **)p->batch_task->addr;
> +assert(!migrate_use_main_zero_page());
> +
> +for (int i = 0; i < p->pages->num; i++) {
> +p->batch_task->results[i] = buffer_is_zero(buf[i], p->page_size);
> +}
> +}
> +
> +static void set_normal_pages(MultiFDSendParams *p)
> +{
> +for (int i = 0; i < p->pages->num; i++) {
> +p->batch_task->results[i] = false;
> +}
> +}
Please correct me if I am wrong but set_normal_pages will not be a part of the 
final patch, right? They are there for testing out the performance against 
different zero page ration scenarios. If it’s so, can we isolate these parts 
into a separate patch.
> +
> +static void multifd_zero_page_check(MultiFDSendParams *p)
> +{
> +/* older qemu don't understand zero page on multifd channel */
> +bool use_multifd_zero_page = !migrate_use_main_zero_page();
> +
> +RAMBlock *rb = p->pages->block;
> 

Re: [PATCH v3 01/20] multifd: Add capability to enable/disable zero_page

2024-01-14 Thread Shivam Kumar


> On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> 
> From: Juan Quintela 
> 
> We have to enable it by default until we introduce the new code.
> 
> Signed-off-by: Juan Quintela 
> ---
> migration/options.c | 15 +++
> migration/options.h |  1 +
> qapi/migration.json |  8 +++-
> 3 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/options.c b/migration/options.c
> index 8d8ec73ad9..0f6bd78b9f 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -204,6 +204,8 @@ Property migration_properties[] = {
> DEFINE_PROP_MIG_CAP("x-switchover-ack",
> MIGRATION_CAPABILITY_SWITCHOVER_ACK),
> DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
> +DEFINE_PROP_MIG_CAP("main-zero-page",
> +MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> DEFINE_PROP_END_OF_LIST(),
> };
> 
> @@ -284,6 +286,19 @@ bool migrate_multifd(void)
> return s->capabilities[MIGRATION_CAPABILITY_MULTIFD];
> }
> 
> +bool migrate_use_main_zero_page(void)
> +{
> +/* MigrationState *s; */
> +
> +/* s = migrate_get_current(); */
> +
> +/*
> + * We will enable this when we add the right code.
> + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> + */
> +return true;
> +}
> +
> bool migrate_pause_before_switchover(void)
> {
> MigrationState *s = migrate_get_current();
> diff --git a/migration/options.h b/migration/options.h
> index 246c160aee..c901eb57c6 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -88,6 +88,7 @@ int migrate_multifd_channels(void);
> MultiFDCompression migrate_multifd_compression(void);
> int migrate_multifd_zlib_level(void);
> int migrate_multifd_zstd_level(void);
> +bool migrate_use_main_zero_page(void);
> uint8_t migrate_throttle_trigger_threshold(void);
> const char *migrate_tls_authz(void);
> const char *migrate_tls_creds(void);
> diff --git a/qapi/migration.json b/qapi/migration.json
> index eb2f883513..80c4b13516 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -531,6 +531,12 @@
> # and can result in more stable read performance.  Requires KVM
> # with accelerator property "dirty-ring-size" set.  (Since 8.1)
> #
> +#
> +# @main-zero-page: If enabled, the detection of zero pages will be
> +#  done on the main thread.  Otherwise it is done on
> +#  the multifd threads.
> +#  (since 8.2)
> +#
Should the capability name be something like "zero-page-detection" or just 
“zero-page”?
CC: Fabiano Rosas
> # Features:
> #
> # @deprecated: Member @block is deprecated.  Use blockdev-mirror with
> @@ -555,7 +561,7 @@
>{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
>'validate-uuid', 'background-snapshot',
>'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
> -   'dirty-limit'] }
> +   'dirty-limit', 'main-zero-page'] }
> 
> ##
> # @MigrationCapabilityStatus:
> -- 
> 2.30.2
> 
> 
> 



Request for Assistance: Adding I2C Support in QEMU for Raspberry Pi (BCM2835 Peripherals)

2023-07-09 Thread Shivam
Hi, I have added bcm2835_i2c.c inside the i2c folder and wired up it with
bcm2835_peripheral.c , I tried to launch qemu with "-device
pca9552,address=0x60" , and it launches without any error means it can able
to found the i2c-bus


But in the emulated rpi terminal I cant able to detect this device

I tried to enable i2c but it says *Unknown parameter i2c_arm


and on i2cdetect -y 1 it throws error

Error: Could not open file /dev/i2c-1' or /dev/i2c/1': No such file or
directory

This is the output for info qtree ( related to i2c) in qemu monitor

dev: bcm2835-i2c, id ""

gpio-out "sysbus-irq" 1mmio /1000bus: i2c
type i2c-bus  dev: pca9552, id ""   gpio-out "" 16description =
"pca-unspecified"address = 96 (0x60)`

Thanks & Regards
Shivam Vijay


Re: Request for Assistance: Adding I2C Support in QEMU for Raspberry Pi (BCM2835 Peripherals)

2023-06-30 Thread Shivam
One more thing I have tried to add this  -device pca9552,address=0x60 after
QEMU command means i2c-bus is added right ?


Re: Request for Assistance: Adding I2C Support in QEMU for Raspberry Pi (BCM2835 Peripherals)

2023-06-30 Thread Shivam
Hi , I Integrated the i2c controller inside the bcm2835_peripheral.c , but
on running this command(sudo i2cdetect -y 1) I am getting error
 Error: Could not open file `/dev/i2c-1' or `/dev/i2c/1': No such file or
directory
I am not able to understand how it is mapped to the i2c controller device
that I have added, can anyone provide me any resources to understand
this issue?


Facing difficulties in building QEMU for windows

2023-06-24 Thread Shivam
I am currently following these resources to build QEMU - Hosts/W32 - QEMU


However, when starting qemu-system-aarch64.exe in a console, nothing
happens. I expected a window showing up. Shortly after starting the exe,
I'm getting back the prompt. Nothing printed out to the console. No
necessary DLL is missing. What could be the problem?


Request for Assistance: Adding I2C Support in QEMU for Raspberry Pi (BCM2835 Peripherals)

2023-06-20 Thread Shivam
Hi,

I hope this email finds you well. I am reaching out to seek guidance and
assistance regarding a project I am working on involving the addition of
I2C support in QEMU for the Raspberry Pi, specifically targeting the
BCM2835 peripherals.

I have been studying the BCM2835 datasheet to familiarize myself with the
I2C device registers and their functionalities. Currently, I have started
implementing the i2c controller for bcm2835, but now couldn't able to get
the feel that how should I integrate it with the BCM2835
Soc.(bcm2835_peripheral.c)

I have attaching bcm2835_i2c.c ( which have basic template for BSC0
controller)


Thanks & Regards
Shivam Vijay


bcm2835_i2c.c
Description: Binary data


Support for ACK and PWR Leds in Raspberry Pi

2023-06-13 Thread Shivam
Hi, I want to add ACK and PWR leds support in the qemu raspi machine so
from the terminal of emulated raspi I should able to control these leds, I
coudn't able to understand the hardware level controlling of this leds , so
if any can help me to start on that then it would be great .

Thanks & Regards
Shivam Vijay


Seeking help regarding addition of Custom GPIO device inside raspi machine QEMU

2023-06-11 Thread Shivam
Hi, I am writing to discuss my current project involving the addition of
peripheral support and testing in QEMU for Raspberry Pi. As part of this
endeavor, I have developed a device called "raspi_gpio" specifically
designed for handling GPIO operations. This device includes functionalities
for both reading and writing data.

To facilitate communication between the GPIO pins and the host machine, I
have implemented a shared memory mechanism within the "raspi_gpio" device.
However, I have encountered a challenge in the process of integrating my
device into the raspi machine. I have been unable to determine the
appropriate steps for adding my device effectively.

So I would request if anyone can help me with this issue.

Thanks & Regards
Shivam Vijay.


Re: [RFC PATCH 1/1] Dirty quota-based throttling of vcpus

2023-02-13 Thread Shivam Kumar




On 22/11/22 9:30 am, Shivam Kumar wrote:



On 21/11/22 5:05 pm, Philippe Mathieu-Daudé wrote:

Hi,

On 20/11/22 23:54, Shivam Kumar wrote:

+
+void dirty_quota_migration_start(void)
+{
+    if (!kvm_state->dirty_quota_supported) {


You are accessing an accelerator-specific variable in an 
accelerator-agnostic file, this doesn't sound correct.


You might introduce some hooks in AccelClass and implement them in
accel/kvm/. See for example gdbstub_supported_sstep_flags() and
kvm_gdbstub_sstep_flags().


Ack.

Thanks,
Shivam


Hi Philippe,

I had received a suggestion on the kernel-side patchset to make dirty 
quota a more generic feature and not limit its use to live migration. 
Incorporating this ask might lead to a significant change in the dirty 
quota interface. So, I haven't been able to post the next version of the 
QEMU patchset. I intend to post it once the new proposition looks good 
to the KVM reviewers.


Thanks,
Shivam



Re: [RFC PATCH 0/1] QEMU: Dirty quota-based throttling of vcpus

2022-12-18 Thread Shivam Kumar




On 06/12/22 10:59 pm, Hyman Huang wrote:



在 2022/12/7 0:00, Peter Xu 写道:

Hi, Shivam,

On Tue, Dec 06, 2022 at 11:18:52AM +0530, Shivam Kumar wrote:

[...]


Note
--
--

We understand that there is a good scope of improvement in the current
implementation. Here is a list of things we are working on:
1) Adding dirty quota as a migration capability so that it can be 
toggled

through QMP command.
2) Adding support for throttling guest DMAs.
3) Not enabling dirty quota for the first migration iteration.


Agreed.

4) Falling back to current auto-converge based throttling in cases 
where dirty

quota throttling can overthrottle.


If overthrottle happens, would auto-converge always be better?



Please stay tuned for the next patchset.

Shivam Kumar (1):
    Dirty quota-based throttling of vcpus

   accel/kvm/kvm-all.c   | 91 
+++

   include/exec/memory.h |  3 ++
   include/hw/core/cpu.h |  5 +++
   include/sysemu/kvm_int.h  |  1 +
   linux-headers/linux/kvm.h |  9 
   migration/migration.c | 22 ++
   migration/migration.h | 31 +
   softmmu/memory.c  | 64 +++
   8 files changed, 226 insertions(+)



It'd be great if I could get some more feedback before I send v2. 
Thanks.


Sorry to respond late.

What's the status of the kernel patchset?

 From high level the approach looks good at least to me.  It's just 
that (as

I used to mention) we have two similar approaches now on throttling the
guest for precopy.  I'm not sure what's the best way to move forward if
without doing a comparison of the two.

https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_all_cover.1669047366.git.huangy81-40chinatelecom.cn_=DwIDaQ=s883GpUCOChKOHiocYtGcg=4hVFP4-J13xyn-OcN0apTCh8iKZRosf5OJTQePXBMB8=CuJmsk4azThm0mAIiykxHz3F9q4xRCxjXeS5Q00YL6-FSnPF_BKSyW1y8LIiXqRA=QjAcViWNO5THFQvljhrWbDX30yTipTb7KEKWKkc2kDU=
Sorry to say so, and no intention to create a contention, but merging the
two without any thought will definitely confuse everybody.  We need to
figure out a way.

 From what I can tell..

One way is we choose one of them which will be superior to the other and
all of us stick with it (for either higher possibility of migrate, less
interference to the workloads, and so on).

The other way is we take both, when each of them may be suitable for
different scenarios.  However in this latter case, we'd better at 
least be

aware of the differences (which suites what), then that'll be part of
documentation we need for each of the features when the user wants to use
them.

Add Yong into the loop.

Any thoughts?

This is quite different from "dirtylimit capability of migration". IMHO, 
quota-based implementation seems a little complicated, because it 
depends on correctness of dirty quota and the measured data, which 
involves the patchset both in qemu and kernel. It seems that dirtylimit 
and quota-based are not mutually exclusive, at least we can figure out

which suites what firstly depending on the test results as Peter said.

Thank you for sharing the link to this alternate approach towards 
throttling - "dirtylimit capability of migration". I am sharing key 
points from my understanding and some questions below:


1) The alternate approach is exclusively for the dirty ring interface. 
The dirty quota approach is orthogonal to the dirty logging interface. 
It works both with the dirty ring and the dirty bitmap interface.


2) Can we achieve micro-stunning with the alternate approach? Can we say 
with good confidence that for most of the time, we stun the vcpu only 
when it is dirtying the memory? Last time when I checked, dirty ring 
size could be a multiple of 512 which makes it difficult to stun the 
vcpu in microscopic intervals.


3) Also, are we relying on the system administrator to select a limit on 
the dirty rate for "dirtylimit capability of migration"?


4) Also, does "dirtylimit capability of migration" play with the dirty 
ring size in a way that it uses a larger ring size for higher dirty rate 
limits and smaller ring size for smaller dirty rate limits? I think the 
dirty rate limit is good information to choose a good-enough dirty ring 
size.



Thanks,
Shivam



Re: [RFC PATCH 0/1] QEMU: Dirty quota-based throttling of vcpus

2022-12-05 Thread Shivam Kumar




On 21/11/22 4:24 am, Shivam Kumar wrote:

This patchset is the QEMU-side implementation of a (new) dirty "quota"
based throttling algorithm that selectively throttles vCPUs based on their
individual contribution to overall memory dirtying and also dynamically
adapts the throttle based on the available network bandwidth.

Overview
--
--

To throttle memory dirtying, we propose to set a limit on the number of
pages a vCPU can dirty in given fixed microscopic size time intervals. This
limit depends on the network throughput calculated over the last few
intervals so as to throttle the vCPUs based on available network bandwidth.
We are referring to this limit as the "dirty quota" of a vCPU and
the fixed size intervals as the "dirty quota intervals".

One possible approach to distributing the overall scope of dirtying for a
dirty quota interval is to equally distribute it among all the vCPUs. This
approach to the distribution doesn't make sense if the distribution of
workloads among vCPUs is skewed. So, to counter such skewed cases, we
propose that if any vCPU doesn't need its quota for any given dirty
quota interval, we add this quota to a common pool. This common pool (or
"common quota") can be consumed on a first come first serve basis
by all vCPUs in the upcoming dirty quota intervals.


Design
--
--

Userspace KVM

[At the start of dirty logging]
Initialize dirty quota to some
non-zero value for each vcpu.->   [When dirty logging starts]
   Start incrementing dirty count
   for every dirty by the vcpu.

   [Dirty count equals/exceeds
   dirty quota]
If the vcpu has already claimed  <-   Exit to userspace.
its quota for the current dirty
quota interval:

 1) If common quota is
 available, give the vcpu
 its quota from common pool.

 2) Else sleep the vcpu until
 the next interval starts.

Give the vcpu its share for the
current(fresh) dirty quota   ->  Continue dirtying with the newly
interval.received quota.

[At the end of dirty logging]
Set dirty quota back to zero
for every vcpu. ->   Throttling disabled.


References
--
--

KVM Forum Talk: https://www.youtube.com/watch?v=ZBkkJf78zFA
Kernel Patchset:
https://lore.kernel.org/all/20221113170507.208810-1-shivam.kum...@nutanix.com/


Note
--
--

We understand that there is a good scope of improvement in the current
implementation. Here is a list of things we are working on:
1) Adding dirty quota as a migration capability so that it can be toggled
through QMP command.
2) Adding support for throttling guest DMAs.
3) Not enabling dirty quota for the first migration iteration.
4) Falling back to current auto-converge based throttling in cases where dirty
quota throttling can overthrottle.

Please stay tuned for the next patchset.

Shivam Kumar (1):
   Dirty quota-based throttling of vcpus

  accel/kvm/kvm-all.c   | 91 +++
  include/exec/memory.h |  3 ++
  include/hw/core/cpu.h |  5 +++
  include/sysemu/kvm_int.h  |  1 +
  linux-headers/linux/kvm.h |  9 
  migration/migration.c | 22 ++
  migration/migration.h | 31 +
  softmmu/memory.c  | 64 +++
  8 files changed, 226 insertions(+)



It'd be great if I could get some more feedback before I send v2. Thanks.

CC: Peter Xu, Juan Quintela



Re: [RFC PATCH 1/1] Dirty quota-based throttling of vcpus

2022-11-22 Thread Shivam Kumar




On 21/11/22 5:05 pm, Philippe Mathieu-Daudé wrote:

Hi,

On 20/11/22 23:54, Shivam Kumar wrote:

+
+void dirty_quota_migration_start(void)
+{
+    if (!kvm_state->dirty_quota_supported) {


You are accessing an accelerator-specific variable in an 
accelerator-agnostic file, this doesn't sound correct.


You might introduce some hooks in AccelClass and implement them in
accel/kvm/. See for example gdbstub_supported_sstep_flags() and
kvm_gdbstub_sstep_flags().


Ack.

Thanks,
Shivam



[RFC PATCH 0/1] QEMU: Dirty quota-based throttling of vcpus

2022-11-20 Thread Shivam Kumar
This patchset is the QEMU-side implementation of a (new) dirty "quota"
based throttling algorithm that selectively throttles vCPUs based on their
individual contribution to overall memory dirtying and also dynamically
adapts the throttle based on the available network bandwidth.

Overview
--
--

To throttle memory dirtying, we propose to set a limit on the number of
pages a vCPU can dirty in given fixed microscopic size time intervals. This
limit depends on the network throughput calculated over the last few
intervals so as to throttle the vCPUs based on available network bandwidth.
We are referring to this limit as the "dirty quota" of a vCPU and
the fixed size intervals as the "dirty quota intervals". 

One possible approach to distributing the overall scope of dirtying for a
dirty quota interval is to equally distribute it among all the vCPUs. This
approach to the distribution doesn't make sense if the distribution of
workloads among vCPUs is skewed. So, to counter such skewed cases, we
propose that if any vCPU doesn't need its quota for any given dirty
quota interval, we add this quota to a common pool. This common pool (or
"common quota") can be consumed on a first come first serve basis
by all vCPUs in the upcoming dirty quota intervals.


Design
--
--

Userspace KVM

[At the start of dirty logging]
Initialize dirty quota to some
non-zero value for each vcpu.->   [When dirty logging starts]
  Start incrementing dirty count
  for every dirty by the vcpu.

  [Dirty count equals/exceeds
  dirty quota]
If the vcpu has already claimed  <-   Exit to userspace.
its quota for the current dirty   
quota interval:

1) If common quota is
available, give the vcpu
its quota from common pool.

2) Else sleep the vcpu until
the next interval starts.

Give the vcpu its share for the
current(fresh) dirty quota   ->  Continue dirtying with the newly
interval.received quota.  

[At the end of dirty logging] 
Set dirty quota back to zero
for every vcpu. ->   Throttling disabled.


References
--
--

KVM Forum Talk: https://www.youtube.com/watch?v=ZBkkJf78zFA
Kernel Patchset:
https://lore.kernel.org/all/20221113170507.208810-1-shivam.kum...@nutanix.com/


Note
--
--

We understand that there is a good scope of improvement in the current
implementation. Here is a list of things we are working on:
1) Adding dirty quota as a migration capability so that it can be toggled
through QMP command.
2) Adding support for throttling guest DMAs.
3) Not enabling dirty quota for the first migration iteration.
4) Falling back to current auto-converge based throttling in cases where dirty
quota throttling can overthrottle.

Please stay tuned for the next patchset.

Shivam Kumar (1):
  Dirty quota-based throttling of vcpus

 accel/kvm/kvm-all.c   | 91 +++
 include/exec/memory.h |  3 ++
 include/hw/core/cpu.h |  5 +++
 include/sysemu/kvm_int.h  |  1 +
 linux-headers/linux/kvm.h |  9 
 migration/migration.c | 22 ++
 migration/migration.h | 31 +
 softmmu/memory.c  | 64 +++
 8 files changed, 226 insertions(+)

-- 
2.22.3




[RFC PATCH 1/1] Dirty quota-based throttling of vcpus

2022-11-20 Thread Shivam Kumar
Introduces a (new) throttling scheme where QEMU defines a limit on the dirty
rate of each vcpu of the VM. This limit is enfored on the vcpus in small
intervals (dirty quota intervals) by allowing the vcpus to dirty only as many
pages in these intervals as to maintain a dirty rate below the set limit.

Suggested-by: Shaju Abraham 
Suggested-by: Manish Mishra 
Co-developed-by: Anurag Madnawat 
Signed-off-by: Anurag Madnawat 
Signed-off-by: Shivam Kumar 
---
 accel/kvm/kvm-all.c   | 91 +++
 include/exec/memory.h |  3 ++
 include/hw/core/cpu.h |  5 +++
 include/sysemu/kvm_int.h  |  1 +
 linux-headers/linux/kvm.h |  9 
 migration/migration.c | 22 ++
 migration/migration.h | 31 +
 softmmu/memory.c  | 64 +++
 8 files changed, 226 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..ea50605592 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -46,6 +46,8 @@
 #include "sysemu/hw_accel.h"
 #include "kvm-cpus.h"
 #include "sysemu/dirtylimit.h"
+#include "hw/core/cpu.h"
+#include "migration/migration.h"
 
 #include "hw/boards.h"
 #include "monitor/stats.h"
@@ -2463,6 +2465,8 @@ static int kvm_init(MachineState *ms)
 }
 }
 
+s->dirty_quota_supported = kvm_vm_check_extension(s, KVM_CAP_DIRTY_QUOTA);
+
 /*
  * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is
  * enabled.  More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no
@@ -2808,6 +2812,88 @@ static void kvm_eat_signals(CPUState *cpu)
 } while (sigismember(, SIG_IPI));
 }
 
+static void handle_dirty_quota_sleep(int64_t sleep_time)
+{
+/* Do not throttle the vcpu more than the maximum throttle. */
+sleep_time = MIN(sleep_time,
+DIRTY_QUOTA_MAX_THROTTLE * DIRTY_QUOTA_INTERVAL_SIZE);
+/* Convert sleep time from nanoseconds to microseconds. */
+g_usleep(sleep_time / 1000);
+}
+
+static uint64_t handle_dirty_quota_exhausted(
+CPUState *cpu, const uint64_t count, const uint64_t quota)
+{
+MigrationState *s = migrate_get_current();
+uint64_t time_to_sleep;
+int64_t unclaimed_quota;
+int64_t dirty_quota_overflow = (count - quota);
+uint64_t dirty_rate_limit = qatomic_read(>per_vcpu_dirty_rate_limit);
+uint64_t new_quota = (dirty_rate_limit * DIRTY_QUOTA_INTERVAL_SIZE) /
+NANOSECONDS_PER_SECOND;
+uint64_t current_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+/* Penalize the vCPU if it dirtied more pages than it was allowed to. */
+if (dirty_quota_overflow > 0) {
+time_to_sleep = (dirty_quota_overflow * NANOSECONDS_PER_SECOND) /
+dirty_rate_limit;
+cpu->dirty_quota_expiry_time = current_time + time_to_sleep;
+return time_to_sleep;
+}
+
+/*
+ * If the current dirty quota interval hasn't ended, try using common quota
+ * if it is available, else sleep.
+ */
+current_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+if (current_time < cpu->dirty_quota_expiry_time) {
+qemu_spin_lock(>common_dirty_quota_lock);
+if (s->common_dirty_quota > 0) {
+s->common_dirty_quota -= new_quota;
+qemu_spin_unlock(>common_dirty_quota_lock);
+cpu->kvm_run->dirty_quota = count + new_quota;
+return 0;
+}
+
+qemu_spin_unlock(>common_dirty_quota_lock);
+current_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+/* If common quota isn't available, sleep for the remaining interval. 
*/
+if (current_time < cpu->dirty_quota_expiry_time) {
+time_to_sleep = cpu->dirty_quota_expiry_time - current_time;
+return time_to_sleep;
+}
+}
+
+/*
+ * This is a fresh dirty quota interval. If the vcpu has not claimed its
+ * quota for the previous intervals, add them to the common quota.
+ */
+current_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+unclaimed_quota = (current_time - cpu->dirty_quota_expiry_time) *
+dirty_rate_limit;
+qemu_spin_lock(>common_dirty_quota_lock);
+s->common_dirty_quota += unclaimed_quota;
+qemu_spin_unlock(>common_dirty_quota_lock);
+
+/*  Allocate the vcpu this new interval's dirty quota. */
+cpu->kvm_run->dirty_quota = count + new_quota;
+cpu->dirty_quota_expiry_time = current_time + DIRTY_QUOTA_INTERVAL_SIZE;
+return 0;
+}
+
+
+static void handle_kvm_exit_dirty_quota_exhausted(CPUState *cpu,
+const uint64_t count, const uint64_t quota)
+{
+uint64_t time_to_sleep;
+do {
+time_to_sleep = handle_dirty_quota

network buffering in fault tolerance

2020-09-29 Thread Shivam Mehra
I came across this documentation with source code for providing network
buffering to applications
https://www.nfradead.org/~tgr/libnl/doc/api/route_2qdisc_2plug_8c_source.html
.
This network-buffering helps output-commit problem when providing fault
tolerance to virtual machines. The output is buffered until an
acknowledgement arrives from the backup VM and then released to the
external world. So that backup and primary VMs seem consistent externally.
Initially developed for XEN VMM to provide fault tolerance to VMs and I
think it's now available for QEMU too.

Where does the script reside which does network-buffering for checkpoints?
and what are the commands to make this happen?

I want to do this network-buffering for packets originating from an
application. Is it possible to do it in the same way as above? Does it do
any damage to the host kernel? Can I get  a simple working example for this?


Network I/O Buffering

2020-09-29 Thread Shivam Mehra
I came across this documentation with source code for providing network
buffering to applications
https://www.infradead.org/~tgr/libnl/doc/api/route_2qdisc_2plug_8c_source.html.
This network-buffering helps output-commit problem when providing fault
tolerance to virtual machines. The output is buffered until an
acknowledgement arrives from the backup VM and then released to the
external world. So that backup and primary VMs seem consistent externally.
Initially developed for XEN VMM to provide fault tolerance to VMs and I
think it's now available for QEMU too.

Where does the script reside which does network-buffering for checkpoints?
and what are the commands to make this happen?

I want to do this network-buffering for packets originating from an
application. Is it possible to do it in the same way as above? Does it do
any damage to the host kernel? Can I get  a simple working example for this?


[Qemu-devel] Affordable Link Building SEO Services

2011-08-12 Thread Shivam
Hi,

 

Greetings of the Day!

 

We have launched our SEO packages on discounted prices. Avail the benefit of
our discounted packages to boost up your business.

 

We are a fast growing company based in India with primary focus on Website
Designing, Web Development (ASP.Net, Java, Perl and PHP development)  SEO
Services.

 

We have a dedicated team of 150 professionals to serve you in building
appropriate links and developing SEO.

 

We can provide you with a fresh, professional image via a recognizable
trademark or logo design. Our experts work to tight deadlines and are wholly
dedicated to maintaining your online presence.

 

We have our competency in CMS (Joomla, Modx, Mambo and other quality Content
Management System) and e-commerce website.

 

Most firms overseas have achieved a significant amount of savings by
outsourcing either complete or part of their work to us in India.

Other Affordable Search Engine Optimization Services Provided By Us.

 

OUR SERVICES:

   

1. Website Design  Development

2. SEO (Search Engine Optimization)

3. Link Building

4. Press Release

5. Article Submission/Article Writing

6. Blog Submissions/Writing

7. Directory Submission

8. Social Media Optimization.

 

Take advantage of our highly successful internet marketing service 
software development work at very affordable price.

 

If you have any query, we will be more than happy to provide you our quick
assistance.

 

 

Kind Regards,  

Name: Shivam Singh

Designation: Sales Executive.

 

Note: We are not spammers and are against spamming of any kind. If you are
not interested then you can reply with a simple \NO\, we will never
contact you again.