FYI,
Share my local COLO test steps/scripts on the same host.
S1: ./primary.sh
S2: ./secondary.sh
S3: cat secondary-cmd.json | nc localhost 55555
S4: cat primary-cmd.json | nc localhost 25555
Till now, primary and secondary VM entered the COLO state
Then, we can trigger the failover
(Primary takeover)S5_1: killall -9 secondary; sleep 1; cat
primary-failover.json | nc localhost 25555
or
(Secondary takeover)S_2: killall -9 primary; sleep 1; cat
secondary-failover.json | nc localhost 55555
=========scripts=============
# cat primary.sh
cmd="./primary -enable-kvm -cpu qemu64,kvmclock=on -m 4096 -smp 1 -device
piix3-usb-uhci -device usb-tablet -name primary -netdev
tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device
e1000,id=e0,netdev=hn0 -chardev
socket,id=mirror0,host=0.0.0.0,port=9003,server=on,wait=off -chardev
socket,id=compare1,host=0.0.0.0,port=9004,server=on,wait=on -chardev
socket,id=compare0,host=127.0.0.1,port=9001,server=on,wait=off -chardev
socket,id=compare0-0,host=127.0.0.1,port=9001 -chardev
socket,id=compare_out,host=127.0.0.1,port=9005,server=on,wait=off -chardev
socket,id=compare_out0,host=127.0.0.1,port=9005 -object
filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 -object
filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out -object
filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 -object
iothread,id=iothread1 -object
colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,iothread=iothread1
-drive
if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,children.0.file.filename=/home/lizhijian/images/colo/primary/primary.qcow2,children.0.driver=qcow2
-nographic -monitor telnet:127.0.0.1:15555,server,nowait -qmp
telnet:127.0.0.1:25555,server,nowait -S"
echo $cmd
exec $cmd
# cat secondary.sh
cmd="./secondary -enable-kvm -cpu qemu64,kvmclock=on -m 4096 -smp 1 -qmp
telnet:127.0.0.1:55555,server,nowait -device piix3-usb-uhci -device usb-tablet
-name secondary -netdev
tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown -device
e1000,id=e0,netdev=hn0 -chardev
socket,id=red0,host=127.0.0.1,port=9003,reconnect-ms=1 -chardev
socket,id=red1,host=127.0.0.1,port=9004,reconnect-ms=1 -object
filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 -object
filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 -object
filter-rewriter,id=rew0,netdev=hn0,queue=all -drive
if=none,id=parent0,file.filename=/home/lizhijian/images/colo/secondary/primary.qcow2,driver=qcow2
-drive
if=none,id=childs0,driver=replication,mode=secondary,file.driver=qcow2,top-id=colo-disk0,file.file.filename=/home/lizhijian/images/colo/secondary/secondary-active.qcow2,file.backing.driver=qcow2,file.backing.file.filename=/home/lizhijian/images/colo/secondary/secondary-hidden.qcow2,file.backing.backing=parent0
-drive
if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,children.0=childs0
-incoming tcp:0.0.0.0:9998 -nographic -monitor
telnet:127.0.0.1:55554,server,nowait"
echo $cmd
exec $cmd
# cat secondary-cmd.json
{"execute":"qmp_capabilities"}
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [
{"capability": "x-colo", "state": true} ] } }
{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data":
{"host": "0.0.0.0", "port": "9999"} } } }
{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable":
true } }
{'execute': 'trace-event-set-state', 'arguments': {'name': 'colo*', 'enable':
true} }
# cat primary-cmd.json
{"execute":"qmp_capabilities"}
{'execute': 'trace-event-set-state', 'arguments': {'name': 'colo*', 'enable':
true} }
{'execute': 'trace-event-set-state', 'arguments': {'name': 'migrat*', 'enable':
true} }
{"execute": "human-monitor-command", "arguments": {"command-line": "drive_add
-n buddy
driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}}
{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node":
"replication0" } }
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [
{"capability": "x-colo", "state": true } ] } }
{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } }
# cat primary-failover.json
{"execute":"qmp_capabilities"}
{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "child":
"children.1"} }
{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_del
replication0" } }
{"execute": "object-del", "arguments":{ "id": "comp0" } }
{"execute": "object-del", "arguments":{ "id": "iothread1" } }
{"execute": "object-del", "arguments":{ "id": "m0" } }
{"execute": "object-del", "arguments":{ "id": "redire0" } }
{"execute": "object-del", "arguments":{ "id": "redire1" } }
{"execute": "x-colo-lost-heartbeat" }
# cat secondary-failover.json
{"execute":"qmp_capabilities"}
{"execute": "nbd-server-stop"}
{"execute": "x-colo-lost-heartbeat"}
{"execute": "object-del", "arguments":{ "id": "f2" } }
{"execute": "object-del", "arguments":{ "id": "f1" } }
{"execute": "chardev-remove", "arguments":{ "id": "red1" } }
{"execute": "chardev-remove", "arguments":{ "id": "red0" } }
{"execute": "chardev-add", "arguments":{ "id": "mirror0", "backend": {"type":
"socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0",
"port": "9003" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare1", "backend": {"type":
"socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0",
"port": "9004" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare0", "backend": {"type":
"socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1",
"port": "9001" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare0-0", "backend":
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host":
"127.0.0.1", "port": "9001" } }, "server": false } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare_out", "backend":
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host":
"127.0.0.1", "port": "9005" } }, "server": true } } } }
{"execute": "chardev-add", "arguments":{ "id": "compare_out0", "backend":
{"type": "socket", "data": {"addr": { "type": "inet", "data": { "host":
"127.0.0.1", "port": "9005" } }, "server": false } } } }
On 04/11/2025 09:36, Li Zhijian wrote:
> Commit 4881411136 ("migration: Always set DEVICE state") set a new DEVICE
> state before completed during migration, which broke the original transition
> to COLO. The migration flow for precopy has changed to:
> active -> pre-switchover -> device -> completed.
>
> This patch updates the transition state to ensure that the Pre-COLO
> state corresponds to DEVICE state correctly.
>
> Fixes: 4881411136 ("migration: Always set DEVICE state")
> Signed-off-by: Li Zhijian <[email protected]>
> ---
> migration/migration.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index a63b46bbef..6ec7f3cec8 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3095,9 +3095,9 @@ static void migration_completion(MigrationState *s)
> goto fail;
> }
>
> - if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) {
> + if (migrate_colo() && s->state == MIGRATION_STATUS_DEVICE) {
> /* COLO does not support postcopy */
> - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> + migrate_set_state(&s->state, MIGRATION_STATUS_DEVICE,
> MIGRATION_STATUS_COLO);
> } else {
> migration_completion_end(s);