summaryrefslogtreecommitdiffstats
path: root/debian/patches/v7.2.7.diff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--debian/patches/v7.2.7.diff2307
1 files changed, 2307 insertions, 0 deletions
diff --git a/debian/patches/v7.2.7.diff b/debian/patches/v7.2.7.diff
new file mode 100644
index 00000000..ebade3bf
--- /dev/null
+++ b/debian/patches/v7.2.7.diff
@@ -0,0 +1,2307 @@
+Subject: v7.2.7
+Date: Tue Nov 21 12:02:48 2023 +0300
+From: Michael Tokarev <mjt@tls.msk.ru>
+Forwarded: not-needed
+
+This is a difference between upstream qemu v7.2.6
+and upstream qemu v7.2.7.
+--
+ VERSION | 2 +-
+ accel/tcg/tcg-accel-ops-mttcg.c | 9 +---
+ block/nvme.c | 7 +--
+ chardev/char-pty.c | 22 +++++++--
+ disas/riscv.c | 4 +-
+ hw/audio/es1370.c | 2 +-
+ hw/cxl/cxl-host.c | 12 ++---
+ hw/display/ati.c | 8 ++++
+ hw/display/ati_2d.c | 75 +++++++++++++++++++++---------
+ hw/display/ati_int.h | 1 +
+ hw/display/ramfb.c | 1 +
+ hw/i386/amd_iommu.c | 9 +---
+ hw/i386/amd_iommu.h | 2 -
+ hw/ide/core.c | 14 +++---
+ hw/input/lasips2.c | 10 ++++
+ hw/misc/led.c | 2 +-
+ hw/ppc/ppc.c | 97 +++++++++++++++++++++++---------------
+ hw/rdma/vmw/pvrdma_main.c | 16 ++++++-
+ hw/scsi/esp.c | 5 +-
+ hw/scsi/scsi-disk.c | 9 +++-
+ hw/sd/sdhci.c | 15 ++++--
+ include/qemu/host-utils.h | 21 ++++++++-
+ linux-user/hppa/signal.c | 8 ++--
+ linux-user/mips/cpu_loop.c | 4 +-
+ linux-user/sh4/signal.c | 8 ++++
+ linux-user/syscall.c | 43 -----------------
+ meson.build | 2 -
+ migration/migration.c | 9 +++-
+ pc-bios/optionrom/Makefile | 2 +-
+ qemu-img.c | 13 +++++-
+ scripts/analyze-migration.py | 6 +--
+ scripts/tracetool/__init__.py | 2 +-
+ target/arm/helper.c | 9 ++++
+ target/arm/internals.h | 1 -
+ target/arm/ptw.c | 89 ++++++++++++++++++++++++-----------
+ target/i386/tcg/decode-new.c.inc | 98 ++++++++++++++++++++++-----------------
+ target/i386/tcg/decode-new.h | 2 +-
+ target/i386/tcg/emit.c.inc | 30 ++++++++++--
+ target/mips/tcg/msa.decode | 4 +-
+ target/mips/tcg/tx79.decode | 2 +-
+ target/s390x/tcg/insn-data.h.inc | 2 +-
+ target/s390x/tcg/translate.c | 19 +++++++-
+ target/tricore/cpu.c | 6 +--
+ target/tricore/cpu.h | 2 +-
+ target/tricore/op_helper.c | 4 +-
+ tests/migration/s390x/Makefile | 4 +-
+ tests/qemu-iotests/024 | 57 +++++++++++++++++++++++
+ tests/qemu-iotests/024.out | 30 ++++++++++++
+ tests/qtest/ahci-test.c | 86 +++++++++++++++++++++++++++++++++-
+ tests/tcg/Makefile.target | 2 +-
+ tests/tcg/aarch64/Makefile.target | 2 +-
+ tests/tcg/arm/Makefile.target | 2 +-
+ tests/tcg/cris/Makefile.target | 2 +-
+ tests/tcg/hexagon/Makefile.target | 2 +-
+ tests/tcg/i386/Makefile.target | 2 +-
+ tests/tcg/i386/test-avx.py | 2 +-
+ tests/tcg/minilib/Makefile.target | 2 +-
+ tests/tcg/mips/Makefile.target | 2 +-
+ tests/tcg/mips/hello-mips.c | 4 +-
+ tests/tcg/s390x/Makefile.target | 1 +
+ tests/tcg/s390x/laalg.c | 27 +++++++++++
+ ui/gtk-egl.c | 14 +++---
+ ui/gtk.c | 10 ++++
+ ui/vnc.c | 6 +--
+ 64 files changed, 686 insertions(+), 279 deletions(-)
+
+diff --git a/VERSION b/VERSION
+index ba6a7620d4..4afc54e7b7 100644
+--- a/VERSION
++++ b/VERSION
+@@ -1 +1 @@
+-7.2.6
++7.2.7
+diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
+index d50239e0e2..3a021624f4 100644
+--- a/accel/tcg/tcg-accel-ops-mttcg.c
++++ b/accel/tcg/tcg-accel-ops-mttcg.c
+@@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
+ break;
+ case EXCP_HALTED:
+ /*
+- * during start-up the vCPU is reset and the thread is
+- * kicked several times. If we don't ensure we go back
+- * to sleep in the halted state we won't cleanly
+- * start-up when the vCPU is enabled.
+- *
+- * cpu->halted should ensure we sleep in wait_io_event
++ * Usually cpu->halted is set, but may have already been
++ * reset by another thread by the time we arrive here.
+ */
+- g_assert(cpu->halted);
+ break;
+ case EXCP_ATOMIC:
+ qemu_mutex_unlock_iothread();
+diff --git a/block/nvme.c b/block/nvme.c
+index 656624c585..14d01a5ea9 100644
+--- a/block/nvme.c
++++ b/block/nvme.c
+@@ -419,9 +419,10 @@ static bool nvme_process_completion(NVMeQueuePair *q)
+ q->cq_phase = !q->cq_phase;
+ }
+ cid = le16_to_cpu(c->cid);
+- if (cid == 0 || cid > NVME_QUEUE_SIZE) {
+- warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
+- "queue size: %u", cid, NVME_QUEUE_SIZE);
++ if (cid == 0 || cid > NVME_NUM_REQS) {
++ warn_report("NVMe: Unexpected CID in completion queue: %" PRIu32
++ ", should be within: 1..%u inclusively", cid,
++ NVME_NUM_REQS);
+ continue;
+ }
+ trace_nvme_complete_command(s, q->index, cid);
+diff --git a/chardev/char-pty.c b/chardev/char-pty.c
+index 53f25c6bbd..e6d0b05211 100644
+--- a/chardev/char-pty.c
++++ b/chardev/char-pty.c
+@@ -108,11 +108,27 @@ static void pty_chr_update_read_handler(Chardev *chr)
+ static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
+ {
+ PtyChardev *s = PTY_CHARDEV(chr);
++ GPollFD pfd;
++ int rc;
+
+- if (!s->connected) {
+- return len;
++ if (s->connected) {
++ return io_channel_send(s->ioc, buf, len);
++ }
++
++ /*
++ * The other side might already be re-connected, but the timer might
++ * not have fired yet. So let's check here whether we can write again:
++ */
++ pfd.fd = QIO_CHANNEL_FILE(s->ioc)->fd;
++ pfd.events = G_IO_OUT;
++ pfd.revents = 0;
++ TFR(rc = g_poll(&pfd, 1, 0));
++ g_assert(rc >= 0);
++ if (!(pfd.revents & G_IO_HUP) && (pfd.revents & G_IO_OUT)) {
++ io_channel_send(s->ioc, buf, len);
+ }
+- return io_channel_send(s->ioc, buf, len);
++
++ return len;
+ }
+
+ static GSource *pty_chr_add_watch(Chardev *chr, GIOCondition cond)
+diff --git a/disas/riscv.c b/disas/riscv.c
+index d216b9c39b..dee4e580a0 100644
+--- a/disas/riscv.c
++++ b/disas/riscv.c
+@@ -2173,8 +2173,8 @@ static const char *csr_name(int csrno)
+ case 0x03ba: return "pmpaddr10";
+ case 0x03bb: return "pmpaddr11";
+ case 0x03bc: return "pmpaddr12";
+- case 0x03bd: return "pmpaddr14";
+- case 0x03be: return "pmpaddr13";
++ case 0x03bd: return "pmpaddr13";
++ case 0x03be: return "pmpaddr14";
+ case 0x03bf: return "pmpaddr15";
+ case 0x0780: return "mtohost";
+ case 0x0781: return "mfromhost";
+diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
+index 6904589814..7032bee2f6 100644
+--- a/hw/audio/es1370.c
++++ b/hw/audio/es1370.c
+@@ -503,7 +503,7 @@ static void es1370_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+ case ES1370_REG_DAC2_SCOUNT:
+ case ES1370_REG_ADC_SCOUNT:
+ d += (addr - ES1370_REG_DAC1_SCOUNT) >> 2;
+- d->scount = (val & 0xffff) | (d->scount & ~0xffff);
++ d->scount = (val & 0xffff) << 16 | (val & 0xffff);
+ ldebug ("chan %td CURR_SAMP_CT %d, SAMP_CT %d\n",
+ d - &s->chan[0], val >> 16, (val & 0xffff));
+ break;
+diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
+index 1adf61231a..0fc3e57138 100644
+--- a/hw/cxl/cxl-host.c
++++ b/hw/cxl/cxl-host.c
+@@ -39,12 +39,6 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
+ return;
+ }
+
+- fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
+- for (i = 0, target = object->targets; target; i++, target = target->next) {
+- /* This link cannot be resolved yet, so stash the name for now */
+- fw->targets[i] = g_strdup(target->value);
+- }
+-
+ if (object->size % (256 * MiB)) {
+ error_setg(errp,
+ "Size of a CXL fixed memory window must my a multiple of 256MiB");
+@@ -64,6 +58,12 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
+ fw->enc_int_gran = 0;
+ }
+
++ fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
++ for (i = 0, target = object->targets; target; i++, target = target->next) {
++ /* This link cannot be resolved yet, so stash the name for now */
++ fw->targets[i] = g_strdup(target->value);
++ }
++
+ cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows,
+ g_steal_pointer(&fw));
+
+diff --git a/hw/display/ati.c b/hw/display/ati.c
+index 6e38e00502..4f3bebcfd3 100644
+--- a/hw/display/ati.c
++++ b/hw/display/ati.c
+@@ -1014,6 +1014,7 @@ static Property ati_vga_properties[] = {
+ DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
+ PCI_DEVICE_ID_ATI_RAGE128_PF),
+ DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
++ DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
+ DEFINE_PROP_END_OF_LIST()
+ };
+
+@@ -1035,11 +1036,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
+ k->exit = ati_vga_exit;
+ }
+
++static void ati_vga_init(Object *o)
++{
++ object_property_set_description(o, "x-pixman", "Use pixman for: "
++ "1: fill, 2: blit");
++}
++
+ static const TypeInfo ati_vga_info = {
+ .name = TYPE_ATI_VGA,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(ATIVGAState),
+ .class_init = ati_vga_class_init,
++ .instance_init = ati_vga_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { },
+diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
+index 7d786653e8..0e6b8e4367 100644
+--- a/hw/display/ati_2d.c
++++ b/hw/display/ati_2d.c
+@@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
+ switch (s->regs.dp_mix & GMC_ROP3_MASK) {
+ case ROP3_SRCCOPY:
+ {
++ bool fallback = false;
+ unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
+ s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
+ unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
+@@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
+ src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
+ src_x, src_y, dst_x, dst_y,
+ s->regs.dst_width, s->regs.dst_height);
+- if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
++ if ((s->use_pixman & BIT(1)) &&
++ s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+ s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+- pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+- src_stride, dst_stride, bpp, bpp,
+- src_x, src_y, dst_x, dst_y,
+- s->regs.dst_width, s->regs.dst_height);
+- } else {
++ fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
++ src_stride, dst_stride, bpp, bpp,
++ src_x, src_y, dst_x, dst_y,
++ s->regs.dst_width, s->regs.dst_height);
++ } else if (s->use_pixman & BIT(1)) {
+ /* FIXME: We only really need a temporary if src and dst overlap */
+ int llb = s->regs.dst_width * (bpp / 8);
+ int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
+ uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
+ s->regs.dst_height);
+- pixman_blt((uint32_t *)src_bits, tmp,
+- src_stride, tmp_stride, bpp, bpp,
+- src_x, src_y, 0, 0,
+- s->regs.dst_width, s->regs.dst_height);
+- pixman_blt(tmp, (uint32_t *)dst_bits,
+- tmp_stride, dst_stride, bpp, bpp,
+- 0, 0, dst_x, dst_y,
+- s->regs.dst_width, s->regs.dst_height);
++ fallback = !pixman_blt((uint32_t *)src_bits, tmp,
++ src_stride, tmp_stride, bpp, bpp,
++ src_x, src_y, 0, 0,
++ s->regs.dst_width, s->regs.dst_height);
++ if (!fallback) {
++ fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
++ tmp_stride, dst_stride, bpp, bpp,
++ 0, 0, dst_x, dst_y,
++ s->regs.dst_width, s->regs.dst_height);
++ }
+ g_free(tmp);
++ } else {
++ fallback = true;
++ }
++ if (fallback) {
++ unsigned int y, i, j, bypp = bpp / 8;
++ unsigned int src_pitch = src_stride * sizeof(uint32_t);
++ unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
++
++ for (y = 0; y < s->regs.dst_height; y++) {
++ i = dst_x * bypp;
++ j = src_x * bypp;
++ if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
++ i += (dst_y + y) * dst_pitch;
++ j += (src_y + y) * src_pitch;
++ } else {
++ i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
++ j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
++ }
++ memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
++ }
+ }
+ if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
+ dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
+@@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)
+
+ dst_stride /= sizeof(uint32_t);
+ DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
+- dst_bits, dst_stride, bpp,
+- dst_x, dst_y,
+- s->regs.dst_width, s->regs.dst_height,
+- filler);
+- pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
+- dst_x, dst_y,
+- s->regs.dst_width, s->regs.dst_height,
+- filler);
++ dst_bits, dst_stride, bpp, dst_x, dst_y,
++ s->regs.dst_width, s->regs.dst_height, filler);
++ if (!(s->use_pixman & BIT(0)) ||
++ !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
++ s->regs.dst_width, s->regs.dst_height, filler)) {
++ /* fallback when pixman failed or we don't want to call it */
++ unsigned int x, y, i, bypp = bpp / 8;
++ unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
++ for (y = 0; y < s->regs.dst_height; y++) {
++ i = dst_x * bypp + (dst_y + y) * dst_pitch;
++ for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
++ stn_he_p(&dst_bits[i], bypp, filler);
++ }
++ }
++ }
+ if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
+ dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
+ s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
+diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h
+index 8acb9c7466..055aa2d140 100644
+--- a/hw/display/ati_int.h
++++ b/hw/display/ati_int.h
+@@ -89,6 +89,7 @@ struct ATIVGAState {
+ char *model;
+ uint16_t dev_id;
+ uint8_t mode;
++ uint8_t use_pixman;
+ bool cursor_guest_mode;
+ uint16_t cursor_size;
+ uint32_t cursor_offset;
+diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c
+index 79b9754a58..c2b002d534 100644
+--- a/hw/display/ramfb.c
++++ b/hw/display/ramfb.c
+@@ -97,6 +97,7 @@ static void ramfb_fw_cfg_write(void *dev, off_t offset, size_t len)
+
+ s->width = width;
+ s->height = height;
++ qemu_free_displaysurface(s->ds);
+ s->ds = surface;
+ }
+
+diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
+index 725f69095b..a20f3e1d50 100644
+--- a/hw/i386/amd_iommu.c
++++ b/hw/i386/amd_iommu.c
+@@ -1246,13 +1246,8 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
+ return -AMDVI_IR_ERR;
+ }
+
+- if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
+- trace_amdvi_err("MSI address high 32 bits non-zero when "
+- "Interrupt Remapping enabled.");
+- return -AMDVI_IR_ERR;
+- }
+-
+- if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
++ if (origin->address < AMDVI_INT_ADDR_FIRST ||
++ origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
+ trace_amdvi_err("MSI is not from IOAPIC.");
+ return -AMDVI_IR_ERR;
+ }
+diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
+index 79d38a3e41..210a37dfb1 100644
+--- a/hw/i386/amd_iommu.h
++++ b/hw/i386/amd_iommu.h
+@@ -210,8 +210,6 @@
+ #define AMDVI_INT_ADDR_FIRST 0xfee00000
+ #define AMDVI_INT_ADDR_LAST 0xfeefffff
+ #define AMDVI_INT_ADDR_SIZE (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 1)
+-#define AMDVI_MSI_ADDR_HI_MASK (0xffffffff00000000ULL)
+-#define AMDVI_MSI_ADDR_LO_MASK (0x00000000ffffffffULL)
+
+ /* SB IOAPIC is always on this device in AMD systems */
+ #define AMDVI_IOAPIC_SB_DEVID PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
+diff --git a/hw/ide/core.c b/hw/ide/core.c
+index 1477935270..3e97d665d9 100644
+--- a/hw/ide/core.c
++++ b/hw/ide/core.c
+@@ -2491,19 +2491,19 @@ static void ide_dummy_transfer_stop(IDEState *s)
+
+ void ide_bus_reset(IDEBus *bus)
+ {
+- bus->unit = 0;
+- bus->cmd = 0;
+- ide_reset(&bus->ifs[0]);
+- ide_reset(&bus->ifs[1]);
+- ide_clear_hob(bus);
+-
+- /* pending async DMA */
++ /* pending async DMA - needs the IDEState before it is reset */
+ if (bus->dma->aiocb) {
+ trace_ide_bus_reset_aio();
+ blk_aio_cancel(bus->dma->aiocb);
+ bus->dma->aiocb = NULL;
+ }
+
++ bus->unit = 0;
++ bus->cmd = 0;
++ ide_reset(&bus->ifs[0]);
++ ide_reset(&bus->ifs[1]);
++ ide_clear_hob(bus);
++
+ /* reset dma provider too */
+ if (bus->dma->ops->reset) {
+ bus->dma->ops->reset(bus->dma);
+diff --git a/hw/input/lasips2.c b/hw/input/lasips2.c
+index ea7c07a2ba..6075121b72 100644
+--- a/hw/input/lasips2.c
++++ b/hw/input/lasips2.c
+@@ -351,6 +351,11 @@ static void lasips2_port_class_init(ObjectClass *klass, void *data)
+ {
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
++ /*
++ * The PS/2 mouse port is integreal part of LASI and can not be
++ * created by users without LASI.
++ */
++ dc->user_creatable = false;
+ dc->realize = lasips2_port_realize;
+ }
+
+@@ -397,6 +402,11 @@ static void lasips2_kbd_port_class_init(ObjectClass *klass, void *data)
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ LASIPS2PortDeviceClass *lpdc = LASIPS2_PORT_CLASS(klass);
+
++ /*
++ * The PS/2 keyboard port is integreal part of LASI and can not be
++ * created by users without LASI.
++ */
++ dc->user_creatable = false;
+ device_class_set_parent_realize(dc, lasips2_kbd_port_realize,
+ &lpdc->parent_realize);
+ }
+diff --git a/hw/misc/led.c b/hw/misc/led.c
+index f6d6d68bce..42bb43a39a 100644
+--- a/hw/misc/led.c
++++ b/hw/misc/led.c
+@@ -63,7 +63,7 @@ static void led_set_state_gpio_handler(void *opaque, int line, int new_state)
+ LEDState *s = LED(opaque);
+
+ assert(line == 0);
+- led_set_state(s, !!new_state != s->gpio_active_high);
++ led_set_state(s, !!new_state == s->gpio_active_high);
+ }
+
+ static void led_reset(DeviceState *dev)
+diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
+index fbdc48911e..b17804fc17 100644
+--- a/hw/ppc/ppc.c
++++ b/hw/ppc/ppc.c
+@@ -490,10 +490,32 @@ void ppce500_set_mpic_proxy(bool enabled)
+ /*****************************************************************************/
+ /* PowerPC time base and decrementer emulation */
+
++/*
++ * Conversion between QEMU_CLOCK_VIRTUAL ns and timebase (TB) ticks:
++ * TB ticks are arrived at by multiplying tb_freq then dividing by
++ * ns per second, and rounding down. TB ticks drive all clocks and
++ * timers in the target machine.
++ *
++ * Converting TB intervals to ns for the purpose of setting a
++ * QEMU_CLOCK_VIRTUAL timer should go the other way, but rounding
++ * up. Rounding down could cause the timer to fire before the TB
++ * value has been reached.
++ */
++static uint64_t ns_to_tb(uint32_t freq, int64_t clock)
++{
++ return muldiv64(clock, freq, NANOSECONDS_PER_SECOND);
++}
++
++/* virtual clock in TB ticks, not adjusted by TB offset */
++static int64_t tb_to_ns_round_up(uint32_t freq, uint64_t tb)
++{
++ return muldiv64_round_up(tb, NANOSECONDS_PER_SECOND, freq);
++}
++
+ uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset)
+ {
+ /* TB time in tb periods */
+- return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset;
++ return ns_to_tb(tb_env->tb_freq, vmclk) + tb_offset;
+ }
+
+ uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
+@@ -534,8 +556,7 @@ uint32_t cpu_ppc_load_tbu (CPUPPCState *env)
+ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
+ int64_t *tb_offsetp, uint64_t value)
+ {
+- *tb_offsetp = value -
+- muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
++ *tb_offsetp = value - ns_to_tb(tb_env->tb_freq, vmclk);
+
+ trace_ppc_tb_store(value, *tb_offsetp);
+ }
+@@ -693,16 +714,17 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env)
+ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+ {
+ ppc_tb_t *tb_env = env->tb_env;
+- int64_t decr, diff;
++ uint64_t now, n;
++ int64_t decr;
+
+- diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+- if (diff >= 0) {
+- decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+- } else if (tb_env->flags & PPC_TIMER_BOOKE) {
++ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
++ n = ns_to_tb(tb_env->decr_freq, now);
++ if (next > n && tb_env->flags & PPC_TIMER_BOOKE) {
+ decr = 0;
+- } else {
+- decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
++ } else {
++ decr = next - n;
+ }
++
+ trace_ppc_decr_load(decr);
+
+ return decr;
+@@ -724,7 +746,9 @@ target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+ * to 64 bits, otherwise it is a 32 bit value.
+ */
+ if (env->spr[SPR_LPCR] & LPCR_LD) {
+- return decr;
++ PowerPCCPU *cpu = env_archcpu(env);
++ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
++ return sextract64(decr, 0, pcc->lrg_decr_bits);
+ }
+ return (uint32_t) decr;
+ }
+@@ -743,7 +767,7 @@ target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+ * extended to 64 bits, otherwise it is 32 bits.
+ */
+ if (pcc->lrg_decr_bits > 32) {
+- return hdecr;
++ return sextract64(hdecr, 0, pcc->lrg_decr_bits);
+ }
+ return (uint32_t) hdecr;
+ }
+@@ -819,11 +843,17 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+ }
+
+ /*
+- * Going from 2 -> 1, 1 -> 0 or 0 -> -1 is the event to generate a DEC
+- * interrupt.
+- *
+- * If we get a really small DEC value, we can assume that by the time we
+- * handled it we should inject an interrupt already.
++ * Calculate the next decrementer event and set a timer.
++ * decr_next is in timebase units to keep rounding simple. Note it is
++ * not adjusted by tb_offset because if TB changes via tb_offset changing,
++ * decrementer does not change, so not directly comparable with TB.
++ */
++ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
++ next = ns_to_tb(tb_env->decr_freq, now) + value;
++ *nextp = next; /* nextp is in timebase units */
++
++ /*
++ * Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt.
+ *
+ * On MSB level based DEC implementations the MSB always means the interrupt
+ * is pending, so raise it on those.
+@@ -831,8 +861,7 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+ * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers
+ * an edge interrupt, so raise it here too.
+ */
+- if ((value < 3) ||
+- ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
++ if (((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+ ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+ && signed_decr >= 0)) {
+ (*raise_excp)(cpu);
+@@ -844,13 +873,8 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+ (*lower_excp)(cpu);
+ }
+
+- /* Calculate the next timer event */
+- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+- next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+- *nextp = next;
+-
+ /* Adjust timer */
+- timer_mod(timer, next);
++ timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next));
+ }
+
+ static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
+@@ -1135,9 +1159,7 @@ static void cpu_4xx_fit_cb (void *opaque)
+ /* Cannot occur, but makes gcc happy */
+ return;
+ }
+- next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
+- if (next == now)
+- next++;
++ next = now + tb_to_ns_round_up(tb_env->tb_freq, next);
+ timer_mod(ppc40x_timer->fit_timer, next);
+ env->spr[SPR_40x_TSR] |= 1 << 26;
+ if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
+@@ -1163,14 +1185,15 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
+ } else {
+ trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+- next = now + muldiv64(ppc40x_timer->pit_reload,
+- NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+- if (is_excp)
+- next += tb_env->decr_next - now;
+- if (next == now)
+- next++;
++
++ if (is_excp) {
++ tb_env->decr_next += ppc40x_timer->pit_reload;
++ } else {
++ tb_env->decr_next = ns_to_tb(tb_env->decr_freq, now)
++ + ppc40x_timer->pit_reload;
++ }
++ next = tb_to_ns_round_up(tb_env->decr_freq, tb_env->decr_next);
+ timer_mod(tb_env->decr_timer, next);
+- tb_env->decr_next = next;
+ }
+ }
+
+@@ -1223,9 +1246,7 @@ static void cpu_4xx_wdt_cb (void *opaque)
+ /* Cannot occur, but makes gcc happy */
+ return;
+ }
+- next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+- if (next == now)
+- next++;
++ next = now + tb_to_ns_round_up(tb_env->decr_freq, next);
+ trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+ switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
+ case 0x0:
+diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
+index 4fc6712025..55b338046e 100644
+--- a/hw/rdma/vmw/pvrdma_main.c
++++ b/hw/rdma/vmw/pvrdma_main.c
+@@ -91,19 +91,33 @@ static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state,
+ dma_addr_t dir_addr, uint32_t num_pages)
+ {
+ uint64_t *dir, *tbl;
+- int rc = 0;
++ int max_pages, rc = 0;
+
+ if (!num_pages) {
+ rdma_error_report("Ring pages count must be strictly positive");
+ return -EINVAL;
+ }
+
++ /*
++ * Make sure we can satisfy the requested number of pages in a single
++ * TARGET_PAGE_SIZE sized page table (taking into account that first entry
++ * is reserved for ring-state)
++ */
++ max_pages = TARGET_PAGE_SIZE / sizeof(dma_addr_t) - 1;
++ if (num_pages > max_pages) {
++ rdma_error_report("Maximum pages on a single directory must not exceed %d\n",
++ max_pages);
++ return -EINVAL;
++ }
++
+ dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE);
+ if (!dir) {
+ rdma_error_report("Failed to map to page directory (ring %s)", name);
+ rc = -ENOMEM;
+ goto out;
+ }
++
++ /* We support only one page table for a ring */
+ tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
+ if (!tbl) {
+ rdma_error_report("Failed to map to page table (ring %s)", name);
+diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
+index e52188d022..9b11d8c573 100644
+--- a/hw/scsi/esp.c
++++ b/hw/scsi/esp.c
+@@ -759,7 +759,8 @@ static void esp_do_nodma(ESPState *s)
+ }
+
+ if (to_device) {
+- len = MIN(fifo8_num_used(&s->fifo), ESP_FIFO_SZ);
++ len = MIN(s->async_len, ESP_FIFO_SZ);
++ len = MIN(len, fifo8_num_used(&s->fifo));
+ esp_fifo_pop_buf(&s->fifo, s->async_buf, len);
+ s->async_buf += len;
+ s->async_len -= len;
+@@ -1395,7 +1396,7 @@ static void sysbus_esp_gpio_demux(void *opaque, int irq, int level)
+ parent_esp_reset(s, irq, level);
+ break;
+ case 1:
+- esp_dma_enable(opaque, irq, level);
++ esp_dma_enable(s, irq, level);
+ break;
+ }
+ }
+diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
+index e493c28814..b884a6f135 100644
+--- a/hw/scsi/scsi-disk.c
++++ b/hw/scsi/scsi-disk.c
+@@ -1624,9 +1624,10 @@ static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
+ * Since the existing code only checks/updates bits 8-15 of the block
+ * size, restrict ourselves to the same requirement for now to ensure
+ * that a block size set by a block descriptor and then read back by
+- * a subsequent SCSI command will be the same
++ * a subsequent SCSI command will be the same. Also disallow a block
++ * size of 256 since we cannot handle anything below BDRV_SECTOR_SIZE.
+ */
+- if (bs && !(bs & ~0xff00) && bs != s->qdev.blocksize) {
++ if (bs && !(bs & ~0xfe00) && bs != s->qdev.blocksize) {
+ s->qdev.blocksize = bs;
+ trace_scsi_disk_mode_select_set_blocksize(s->qdev.blocksize);
+ }
+@@ -1951,6 +1952,10 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
+ scsi_disk_emulate_write_same(r, r->iov.iov_base);
+ break;
+
++ case FORMAT_UNIT:
++ scsi_req_complete(&r->req, GOOD);
++ break;
++
+ default:
+ abort();
+ }
+diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
+index 306070c872..ef60badc6b 100644
+--- a/hw/sd/sdhci.c
++++ b/hw/sd/sdhci.c
+@@ -321,6 +321,8 @@ static void sdhci_poweron_reset(DeviceState *dev)
+
+ static void sdhci_data_transfer(void *opaque);
+
++#define BLOCK_SIZE_MASK (4 * KiB - 1)
++
+ static void sdhci_send_command(SDHCIState *s)
+ {
+ SDRequest request;
+@@ -371,7 +373,8 @@ static void sdhci_send_command(SDHCIState *s)
+
+ sdhci_update_irq(s);
+
+- if (!timeout && s->blksize && (s->cmdreg & SDHC_CMD_DATA_PRESENT)) {
++ if (!timeout && (s->blksize & BLOCK_SIZE_MASK) &&
++ (s->cmdreg & SDHC_CMD_DATA_PRESENT)) {
+ s->data_count = 0;
+ sdhci_data_transfer(s);
+ }
+@@ -406,7 +409,6 @@ static void sdhci_end_transfer(SDHCIState *s)
+ /*
+ * Programmed i/o data transfer
+ */
+-#define BLOCK_SIZE_MASK (4 * KiB - 1)
+
+ /* Fill host controller's read buffer with BLKSIZE bytes of data from card */
+ static void sdhci_read_block_from_card(SDHCIState *s)
+@@ -1154,7 +1156,8 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
+ s->sdmasysad = (s->sdmasysad & mask) | value;
+ MASKED_WRITE(s->sdmasysad, mask, value);
+ /* Writing to last byte of sdmasysad might trigger transfer */
+- if (!(mask & 0xFF000000) && s->blkcnt && s->blksize &&
++ if (!(mask & 0xFF000000) && s->blkcnt &&
++ (s->blksize & BLOCK_SIZE_MASK) &&
+ SDHC_DMA_TYPE(s->hostctl1) == SDHC_CTRL_SDMA) {
+ if (s->trnmod & SDHC_TRNS_MULTI) {
+ sdhci_sdma_transfer_multi_blocks(s);
+@@ -1168,7 +1171,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
+ if (!TRANSFERRING_DATA(s->prnsts)) {
+ uint16_t blksize = s->blksize;
+
+- MASKED_WRITE(s->blksize, mask, extract32(value, 0, 12));
++ /*
++ * [14:12] SDMA Buffer Boundary
++ * [11:00] Transfer Block Size
++ */
++ MASKED_WRITE(s->blksize, mask, extract32(value, 0, 15));
+ MASKED_WRITE(s->blkcnt, mask >> 16, value >> 16);
+
+ /* Limit block size to the maximum buffer size */
+diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
+index b3434ec0bc..09daf58787 100644
+--- a/include/qemu/host-utils.h
++++ b/include/qemu/host-utils.h
+@@ -57,6 +57,11 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+ return (__int128_t)a * b / c;
+ }
+
++static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
++{
++ return ((__int128_t)a * b + c - 1) / c;
++}
++
+ static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
+ uint64_t divisor)
+ {
+@@ -84,7 +89,8 @@ void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
+ uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+ int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
+
+-static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
++static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c,
++ bool round_up)
+ {
+ union {
+ uint64_t ll;
+@@ -100,12 +106,25 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
++ if (round_up) {
++ rl += c - 1;
++ }
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+ }
++
++static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
++{
++ return muldiv64_rounding(a, b, c, false);
++}
++
++static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
++{
++ return muldiv64_rounding(a, b, c, true);
++}
+ #endif
+
+ /**
+diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
+index f253a15864..ec5f5412d1 100644
+--- a/linux-user/hppa/signal.c
++++ b/linux-user/hppa/signal.c
+@@ -25,7 +25,7 @@
+ struct target_sigcontext {
+ abi_ulong sc_flags;
+ abi_ulong sc_gr[32];
+- uint64_t sc_fr[32];
++ abi_ullong sc_fr[32];
+ abi_ulong sc_iasq[2];
+ abi_ulong sc_iaoq[2];
+ abi_ulong sc_sar;
+@@ -149,16 +149,18 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
+ target_ulong *fdesc, dest;
+
+ haddr &= -4;
+- if (!lock_user_struct(VERIFY_READ, fdesc, haddr, 1)) {
++ fdesc = lock_user(VERIFY_READ, haddr, 2 * sizeof(target_ulong), 1);
++ if (!fdesc) {
+ goto give_sigsegv;
+ }
+ __get_user(dest, fdesc);
+ __get_user(env->gr[19], fdesc + 1);
+- unlock_user_struct(fdesc, haddr, 1);
++ unlock_user(fdesc, haddr, 0);
+ haddr = dest;
+ }
+ env->iaoq_f = haddr;
+ env->iaoq_b = haddr + 4;
++ env->psw_n = 0;
+ return;
+
+ give_sigsegv:
+diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
+index 8735e58bad..990b03e727 100644
+--- a/linux-user/mips/cpu_loop.c
++++ b/linux-user/mips/cpu_loop.c
+@@ -180,7 +180,9 @@ done_syscall:
+ }
+ force_sig_fault(TARGET_SIGFPE, si_code, env->active_tc.PC);
+ break;
+-
++ case EXCP_OVERFLOW:
++ force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTOVF, env->active_tc.PC);
++ break;
+ /* The code below was inspired by the MIPS Linux kernel trap
+ * handling code in arch/mips/kernel/traps.c.
+ */
+diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
+index c4ba962708..c16c2c2d57 100644
+--- a/linux-user/sh4/signal.c
++++ b/linux-user/sh4/signal.c
+@@ -104,6 +104,14 @@ static void unwind_gusa(CPUSH4State *regs)
+
+ /* Reset the SP to the saved version in R1. */
+ regs->gregs[15] = regs->gregs[1];
++ } else if (regs->gregs[15] >= -128u && regs->pc == regs->gregs[0]) {
++ /* If we are on the last instruction of a gUSA region, we must reset
++ the SP, otherwise we would be pushing the signal context to
++ invalid memory. */
++ regs->gregs[15] = regs->gregs[1];
++ } else if (regs->flags & TB_FLAG_DELAY_SLOT) {
++ /* If we are in a delay slot, push the previous instruction. */
++ regs->pc -= 2;
+ }
+ }
+
+diff --git a/linux-user/syscall.c b/linux-user/syscall.c
+index cedf22c5b5..aead0f6ac9 100644
+--- a/linux-user/syscall.c
++++ b/linux-user/syscall.c
+@@ -95,50 +95,7 @@
+ #include <linux/soundcard.h>
+ #include <linux/kd.h>
+ #include <linux/mtio.h>
+-
+-#ifdef HAVE_SYS_MOUNT_FSCONFIG
+-/*
+- * glibc >= 2.36 linux/mount.h conflicts with sys/mount.h,
+- * which in turn prevents use of linux/fs.h. So we have to
+- * define the constants ourselves for now.
+- */
+-#define FS_IOC_GETFLAGS _IOR('f', 1, long)
+-#define FS_IOC_SETFLAGS _IOW('f', 2, long)
+-#define FS_IOC_GETVERSION _IOR('v', 1, long)
+-#define FS_IOC_SETVERSION _IOW('v', 2, long)
+-#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+-#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
+-#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
+-#define FS_IOC32_GETVERSION _IOR('v', 1, int)
+-#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+-
+-#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+-#define BLKDISCARD _IO(0x12,119)
+-#define BLKIOMIN _IO(0x12,120)
+-#define BLKIOOPT _IO(0x12,121)
+-#define BLKALIGNOFF _IO(0x12,122)
+-#define BLKPBSZGET _IO(0x12,123)
+-#define BLKDISCARDZEROES _IO(0x12,124)
+-#define BLKSECDISCARD _IO(0x12,125)
+-#define BLKROTATIONAL _IO(0x12,126)
+-#define BLKZEROOUT _IO(0x12,127)
+-
+-#define FIBMAP _IO(0x00,1)
+-#define FIGETBSZ _IO(0x00,2)
+-
+-struct file_clone_range {
+- __s64 src_fd;
+- __u64 src_offset;
+- __u64 src_length;
+- __u64 dest_offset;
+-};
+-
+-#define FICLONE _IOW(0x94, 9, int)
+-#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
+-
+-#else
+ #include <linux/fs.h>
+-#endif
+ #include <linux/fd.h>
+ #if defined(CONFIG_FIEMAP)
+ #include <linux/fiemap.h>
+diff --git a/meson.build b/meson.build
+index 450c48a9f0..787f91855e 100644
+--- a/meson.build
++++ b/meson.build
+@@ -2032,8 +2032,6 @@ config_host_data.set('HAVE_OPTRESET',
+ cc.has_header_symbol('getopt.h', 'optreset'))
+ config_host_data.set('HAVE_IPPROTO_MPTCP',
+ cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+-config_host_data.set('HAVE_SYS_MOUNT_FSCONFIG',
+- cc.has_header_symbol('sys/mount.h', 'FSCONFIG_SET_FLAG'))
+
+ # has_member
+ config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
+diff --git a/migration/migration.c b/migration/migration.c
+index c19fb5cb3e..c8ca7927b4 100644
+--- a/migration/migration.c
++++ b/migration/migration.c
+@@ -1809,20 +1809,25 @@ void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
+ {
+ MigrationParameters tmp;
+
+- /* TODO Rewrite "" to null instead */
++ /* TODO Rewrite "" to null instead for all three tls_* parameters */
+ if (params->has_tls_creds
+ && params->tls_creds->type == QTYPE_QNULL) {
+ qobject_unref(params->tls_creds->u.n);
+ params->tls_creds->type = QTYPE_QSTRING;
+ params->tls_creds->u.s = strdup("");
+ }
+- /* TODO Rewrite "" to null instead */
+ if (params->has_tls_hostname
+ && params->tls_hostname->type == QTYPE_QNULL) {
+ qobject_unref(params->tls_hostname->u.n);
+ params->tls_hostname->type = QTYPE_QSTRING;
+ params->tls_hostname->u.s = strdup("");
+ }
++ if (params->tls_authz
++ && params->tls_authz->type == QTYPE_QNULL) {
++ qobject_unref(params->tls_authz->u.n);
++ params->tls_authz->type = QTYPE_QSTRING;
++ params->tls_authz->u.s = strdup("");
++ }
+
+ migrate_params_test_apply(params, &tmp);
+
+diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
+index b1fff0ba6c..30d07026c7 100644
+--- a/pc-bios/optionrom/Makefile
++++ b/pc-bios/optionrom/Makefile
+@@ -36,7 +36,7 @@ config-cc.mak: Makefile
+ $(call cc-option,-Wno-array-bounds)) 3> config-cc.mak
+ -include config-cc.mak
+
+-override LDFLAGS = -nostdlib -Wl,-T,$(SRC_DIR)/flat.lds
++override LDFLAGS = -nostdlib -Wl,--build-id=none,-T,$(SRC_DIR)/flat.lds
+
+ pvh.img: pvh.o pvh_main.o
+
+diff --git a/qemu-img.c b/qemu-img.c
+index a9b3a8103c..2c32d9da4e 100644
+--- a/qemu-img.c
++++ b/qemu-img.c
+@@ -3753,6 +3753,8 @@ static int img_rebase(int argc, char **argv)
+ }
+
+ if (prefix_chain_bs) {
++ uint64_t bytes = n;
++
+ /*
+ * If cluster wasn't changed since prefix_chain, we don't need
+ * to take action
+@@ -3765,9 +3767,18 @@ static int img_rebase(int argc, char **argv)
+ strerror(-ret));
+ goto out;
+ }
+- if (!ret) {
++ if (!ret && n) {
+ continue;
+ }
++ if (!n) {
++ /*
++ * If we've reached EOF of the old backing, it means that
++ * offsets beyond the old backing size were read as zeroes.
++ * Now we will need to explicitly zero the cluster in
++ * order to preserve that state after the rebase.
++ */
++ n = bytes;
++ }
+ }
+
+ /*
+diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
+index b82a1b0c58..44d306aedc 100755
+--- a/scripts/analyze-migration.py
++++ b/scripts/analyze-migration.py
+@@ -38,13 +38,13 @@ def __init__(self, filename):
+ self.file = open(self.filename, "rb")
+
+ def read64(self):
+- return int.from_bytes(self.file.read(8), byteorder='big', signed=True)
++ return int.from_bytes(self.file.read(8), byteorder='big', signed=False)
+
+ def read32(self):
+- return int.from_bytes(self.file.read(4), byteorder='big', signed=True)
++ return int.from_bytes(self.file.read(4), byteorder='big', signed=False)
+
+ def read16(self):
+- return int.from_bytes(self.file.read(2), byteorder='big', signed=True)
++ return int.from_bytes(self.file.read(2), byteorder='big', signed=False)
+
+ def read8(self):
+ return int.from_bytes(self.file.read(1), byteorder='big', signed=True)
+diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
+index 5393c7fc5c..cd46e7597c 100644
+--- a/scripts/tracetool/__init__.py
++++ b/scripts/tracetool/__init__.py
+@@ -92,7 +92,7 @@ def out(*lines, **kwargs):
+ def validate_type(name):
+ bits = name.split(" ")
+ for bit in bits:
+- bit = re.sub("\*", "", bit)
++ bit = re.sub(r"\*", "", bit)
+ if bit == "":
+ continue
+ if bit == "const":
+diff --git a/target/arm/helper.c b/target/arm/helper.c
+index 22bc935242..a52ef3dfe4 100644
+--- a/target/arm/helper.c
++++ b/target/arm/helper.c
+@@ -11301,6 +11301,15 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
+ && !(env->pstate & PSTATE_TCO)
+ && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
+ DP_TBFLAG_A64(flags, MTE_ACTIVE, 1);
++ if (!EX_TBFLAG_A64(flags, UNPRIV)) {
++ /*
++ * In non-unpriv contexts (eg EL0), unpriv load/stores
++ * act like normal ones; duplicate the MTE info to
++ * avoid translate-a64.c having to check UNPRIV to see
++ * whether it is OK to index into MTE_ACTIVE[].
++ */
++ DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
++ }
+ }
+ }
+ /* And again for unprivileged accesses, if required. */
+diff --git a/target/arm/internals.h b/target/arm/internals.h
+index 161e42d50f..3c7ff51c99 100644
+--- a/target/arm/internals.h
++++ b/target/arm/internals.h
+@@ -1129,7 +1129,6 @@ typedef struct ARMCacheAttrs {
+ unsigned int attrs:8;
+ unsigned int shareability:2; /* as in the SH field of the VMSAv8-64 PTEs */
+ bool is_s2_format:1;
+- bool guarded:1; /* guarded bit of the v8-64 PTE */
+ } ARMCacheAttrs;
+
+ /* Fields that are valid upon success. */
+diff --git a/target/arm/ptw.c b/target/arm/ptw.c
+index 0b16068557..be0cc3e347 100644
+--- a/target/arm/ptw.c
++++ b/target/arm/ptw.c
+@@ -103,6 +103,37 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
+ return stage_1_mmu_idx(arm_mmu_idx(env));
+ }
+
++/*
++ * Return where we should do ptw loads from for a stage 2 walk.
++ * This depends on whether the address we are looking up is a
++ * Secure IPA or a NonSecure IPA, which we know from whether this is
++ * Stage2 or Stage2_S.
++ * If this is the Secure EL1&0 regime we need to check the NSW and SW bits.
++ */
++static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx)
++{
++ bool s2walk_secure;
++
++ /*
++ * We're OK to check the current state of the CPU here because
++ * (1) we always invalidate all TLBs when the SCR_EL3.NS bit changes
++ * (2) there's no way to do a lookup that cares about Stage 2 for a
++ * different security state to the current one for AArch64, and AArch32
++ * never has a secure EL2. (AArch32 ATS12NSO[UP][RW] allow EL3 to do
++ * an NS stage 1+2 lookup while the NS bit is 0.)
++ */
++ if (!arm_is_secure_below_el3(env) || !arm_el_is_aa64(env, 3)) {
++ return ARMMMUIdx_Phys_NS;
++ }
++ if (stage2idx == ARMMMUIdx_Stage2_S) {
++ s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW);
++ } else {
++ s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW);
++ }
++ return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
++
++}
++
+ static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
+ {
+ return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
+@@ -220,7 +251,6 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx;
+ uint8_t pte_attrs;
+- bool pte_secure;
+
+ ptw->out_virt = addr;
+
+@@ -232,8 +262,8 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ if (regime_is_stage2(s2_mmu_idx)) {
+ S1Translate s2ptw = {
+ .in_mmu_idx = s2_mmu_idx,
+- .in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS,
+- .in_secure = is_secure,
++ .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx),
++ .in_secure = s2_mmu_idx == ARMMMUIdx_Stage2_S,
+ .in_debug = true,
+ };
+ GetPhysAddrResult s2 = { };
+@@ -244,16 +274,17 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ }
+ ptw->out_phys = s2.f.phys_addr;
+ pte_attrs = s2.cacheattrs.attrs;
+- pte_secure = s2.f.attrs.secure;
++ ptw->out_secure = s2.f.attrs.secure;
+ } else {
+ /* Regime is physical. */
+ ptw->out_phys = addr;
+ pte_attrs = 0;
+- pte_secure = is_secure;
++ ptw->out_secure = s2_mmu_idx == ARMMMUIdx_Phys_S;
+ }
+ ptw->out_host = NULL;
+ ptw->out_rw = false;
+ } else {
++#ifdef CONFIG_TCG
+ CPUTLBEntryFull *full;
+ int flags;
+
+@@ -269,7 +300,10 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ ptw->out_phys = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+ ptw->out_rw = full->prot & PAGE_WRITE;
+ pte_attrs = full->pte_attrs;
+- pte_secure = full->attrs.secure;
++ ptw->out_secure = full->attrs.secure;
++#else
++ g_assert_not_reached();
++#endif
+ }
+
+ if (regime_is_stage2(s2_mmu_idx)) {
+@@ -289,11 +323,6 @@ static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ }
+ }
+
+- /* Check if page table walk is to secure or non-secure PA space. */
+- ptw->out_secure = (is_secure
+- && !(pte_secure
+- ? env->cp15.vstcr_el2 & VSTCR_SW
+- : env->cp15.vtcr_el2 & VTCR_NSW));
+ ptw->out_be = regime_translation_big_endian(env, mmu_idx);
+ return true;
+
+@@ -1378,17 +1407,18 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
+ descaddrmask &= ~indexmask_grainsize;
+
+ /*
+- * Secure accesses start with the page table in secure memory and
++ * Secure stage 1 accesses start with the page table in secure memory and
+ * can be downgraded to non-secure at any step. Non-secure accesses
+ * remain non-secure. We implement this by just ORing in the NSTable/NS
+ * bits at each step.
++ * Stage 2 never gets this kind of downgrade.
+ */
+ tableattrs = is_secure ? 0 : (1 << 4);
+
+ next_level:
+ descaddr |= (address >> (stride * (4 - level))) & indexmask;
+ descaddr &= ~7ULL;
+- nstable = extract32(tableattrs, 4, 1);
++ nstable = !regime_is_stage2(mmu_idx) && extract32(tableattrs, 4, 1);
+ if (nstable) {
+ /*
+ * Stage2_S -> Stage2 or Phys_S -> Phys_NS
+@@ -2605,7 +2635,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
+ hwaddr ipa;
+ int s1_prot, s1_lgpgsz;
+ bool is_secure = ptw->in_secure;
+- bool ret, ipa_secure, s2walk_secure;
++ bool ret, ipa_secure, s1_guarded;
+ ARMCacheAttrs cacheattrs1;
+ bool is_el0;
+ uint64_t hcr;
+@@ -2619,20 +2649,11 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
+
+ ipa = result->f.phys_addr;
+ ipa_secure = result->f.attrs.secure;
+- if (is_secure) {
+- /* Select TCR based on the NS bit from the S1 walk. */
+- s2walk_secure = !(ipa_secure
+- ? env->cp15.vstcr_el2 & VSTCR_SW
+- : env->cp15.vtcr_el2 & VTCR_NSW);
+- } else {
+- assert(!ipa_secure);
+- s2walk_secure = false;
+- }
+
+ is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0;
+- ptw->in_mmu_idx = s2walk_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+- ptw->in_ptw_idx = s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
+- ptw->in_secure = s2walk_secure;
++ ptw->in_mmu_idx = ipa_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
++ ptw->in_secure = ipa_secure;
++ ptw->in_ptw_idx = ptw_idx_for_stage_2(env, ptw->in_mmu_idx);
+
+ /*
+ * S1 is done, now do S2 translation.
+@@ -2640,6 +2661,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
+ */
+ s1_prot = result->f.prot;
+ s1_lgpgsz = result->f.lg_page_size;
++ s1_guarded = result->f.guarded;
+ cacheattrs1 = result->cacheattrs;
+ memset(result, 0, sizeof(*result));
+
+@@ -2680,6 +2702,9 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
+ result->cacheattrs = combine_cacheattrs(hcr, cacheattrs1,
+ result->cacheattrs);
+
++ /* No BTI GP information in stage 2, we just use the S1 value */
++ result->f.guarded = s1_guarded;
++
+ /*
+ * Check if IPA translates to secure or non-secure PA space.
+ * Note that VSTCR overrides VTCR and {N}SW overrides {N}SA.
+@@ -2724,6 +2749,16 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw,
+ ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+ break;
+
++ case ARMMMUIdx_Stage2:
++ case ARMMMUIdx_Stage2_S:
++ /*
++ * Second stage lookup uses physical for ptw; whether this is S or
++ * NS may depend on the SW/NSW bits if this is a stage 2 lookup for
++ * the Secure EL2&0 regime.
++ */
++ ptw->in_ptw_idx = ptw_idx_for_stage_2(env, mmu_idx);
++ break;
++
+ case ARMMMUIdx_E10_0:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E0;
+ goto do_twostage;
+@@ -2747,7 +2782,7 @@ static bool get_phys_addr_with_struct(CPUARMState *env, S1Translate *ptw,
+ /* fall through */
+
+ default:
+- /* Single stage and second stage uses physical for ptw. */
++ /* Single stage uses physical for ptw. */
+ ptw->in_ptw_idx = is_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
+ break;
+ }
+diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
+index ee4f4a899f..528e2fdfbb 100644
+--- a/target/i386/tcg/decode-new.c.inc
++++ b/target/i386/tcg/decode-new.c.inc
+@@ -105,6 +105,7 @@
+ #define vex3 .vex_class = 3,
+ #define vex4 .vex_class = 4,
+ #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
++#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
+ #define vex5 .vex_class = 5,
+ #define vex6 .vex_class = 6,
+ #define vex7 .vex_class = 7,
+@@ -236,7 +237,7 @@ static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
+ static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ static const X86OpEntry opcodes_0F6F[4] = {
+- X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex1 mmx), /* movq */
++ X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex5 mmx), /* movq */
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1), /* movdqa */
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* movdqu */
+ {},
+@@ -273,9 +274,9 @@ static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ {
+ static const X86OpEntry opcodes_0F78[4] = {
+ {},
+- X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)),
++ X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)), /* AMD extension */
+ {},
+- X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)),
++ X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)), /* AMD extension */
+ };
+ *entry = *decode_by_prefix(s, opcodes_0F78);
+ }
+@@ -283,9 +284,9 @@ static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ if (s->prefix & PREFIX_REPNZ) {
+- entry->gen = gen_INSERTQ_r;
++ entry->gen = gen_INSERTQ_r; /* AMD extension */
+ } else if (s->prefix & PREFIX_DATA) {
+- entry->gen = gen_EXTRQ_r;
++ entry->gen = gen_EXTRQ_r; /* AMD extension */
+ } else {
+ entry->gen = NULL;
+ };
+@@ -305,7 +306,7 @@ static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ static const X86OpEntry opcodes_0F7F[4] = {
+- X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx), /* movq */
++ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex5 mmx), /* movq */
+ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1), /* movdqa */
+ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4_unal), /* movdqu */
+ {},
+@@ -336,7 +337,7 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
+ [0x07] = X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
+
+ [0x10] = X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
+- [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,ph, vex11 cpuid(F16C) p_66),
++ [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,xh, vex11 cpuid(F16C) p_66),
+ [0x14] = X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE41) p_66),
+ [0x15] = X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE41) p_66),
+ /* Listed incorrectly as type 4 */
+@@ -564,7 +565,7 @@ static const X86OpEntry opcodes_0F3A[256] = {
+ [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) zext0 p_66),
+ [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66),
+ [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66),
+- [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,ph, V,x, I,b, vex11 cpuid(F16C) p_66),
++ [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 cpuid(F16C) p_66),
+
+ [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) zext2 p_66),
+ [0x21] = X86_OP_GROUP0(VINSERTPS),
+@@ -638,15 +639,15 @@ static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static const X86OpEntry opcodes_0F10_reg[4] = {
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */
+- X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex4),
+- X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex4), /* MOVSD */
++ X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex5),
++ X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex5), /* MOVSD */
+ };
+
+ static const X86OpEntry opcodes_0F10_mem[4] = {
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */
+ X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */
+- X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex4),
+- X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex4),
++ X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex5),
++ X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex5),
+ };
+
+ if ((get_modrm(s, env) >> 6) == 3) {
+@@ -659,17 +660,17 @@ static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ static const X86OpEntry opcodes_0F11_reg[4] = {
+- X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPS */
+- X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPD */
+- X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex4),
+- X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex4), /* MOVSD */
++ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */
++ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */
++ X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex5),
++ X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex5), /* MOVSD */
+ };
+
+ static const X86OpEntry opcodes_0F11_mem[4] = {
+- X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPS */
+- X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVPD */
+- X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4),
+- X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4), /* MOVSD */
++ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */
++ X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */
++ X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex5),
++ X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
+ };
+
+ if ((get_modrm(s, env) >> 6) == 3) {
+@@ -686,16 +687,16 @@ static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ * Use dq for operand for compatibility with gen_MOVSD and
+ * to allow VEX128 only.
+ */
+- X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex4), /* MOVLPS */
+- X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex4), /* MOVLPD */
++ X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPS */
++ X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPD */
+ X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)),
+- X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex4 cpuid(SSE3)), /* qq if VEX.256 */
++ X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
+ };
+ static const X86OpEntry opcodes_0F12_reg[4] = {
+- X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex4),
+- X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex4), /* MOVLPD */
++ X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex7),
++ X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex5), /* MOVLPD */
+ X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)),
+- X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)),
++ X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex5 cpuid(SSE3)),
+ };
+
+ if ((get_modrm(s, env) >> 6) == 3) {
+@@ -715,15 +716,15 @@ static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ * Operand 1 technically only reads the low 64 bits, but uses dq so that
+ * it is easier to check for op0 == op1 in an endianness-neutral manner.
+ */
+- X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex4), /* MOVHPS */
+- X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex4), /* MOVHPD */
++ X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPS */
++ X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPD */
+ X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)),
+ {},
+ };
+ static const X86OpEntry opcodes_0F16_reg[4] = {
+ /* Same as above, operand 1 could be Hq if it wasn't for big-endian. */
+- X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex4),
+- X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex4), /* MOVHPD */
++ X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex7),
++ X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex5), /* MOVHPD */
+ X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)),
+ {},
+ };
+@@ -749,8 +750,9 @@ static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ static const X86OpEntry opcodes_0F2B[4] = {
+- X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex4), /* MOVNTPS */
+- X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex4), /* MOVNTPD */
++ X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPS */
++ X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPD */
++ /* AMD extensions */
+ X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
+ X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
+ };
+@@ -803,10 +805,20 @@ static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entr
+ case 0x51: entry->gen = gen_VSQRT; break;
+ case 0x52: entry->gen = gen_VRSQRT; break;
+ case 0x53: entry->gen = gen_VRCP; break;
+- case 0x5A: entry->gen = gen_VCVTfp2fp; break;
+ }
+ }
+
++static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
++{
++ static const X86OpEntry opcodes_0F5A[4] = {
++ X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */
++ X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */
++ X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */
++ X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */
++ };
++ *entry = *decode_by_prefix(s, opcodes_0F5A);
++}
++
+ static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+ {
+ static const X86OpEntry opcodes_0F5B[4] = {
+@@ -823,7 +835,7 @@ static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
+ static const X86OpEntry opcodes_0FE6[4] = {
+ {},
+ X86_OP_ENTRY2(VCVTTPD2DQ, V,x, W,x, vex2),
+- X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex2),
++ X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex5),
+ X86_OP_ENTRY2(VCVTPD2DQ, V,x, W,x, vex2),
+ };
+ *entry = *decode_by_prefix(s, opcodes_0FE6);
+@@ -841,17 +853,17 @@ static const X86OpEntry opcodes_0F[256] = {
+ [0x10] = X86_OP_GROUP0(0F10),
+ [0x11] = X86_OP_GROUP0(0F11),
+ [0x12] = X86_OP_GROUP0(0F12),
+- [0x13] = X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex4 p_00_66),
++ [0x13] = X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex5 p_00_66),
+ [0x14] = X86_OP_ENTRY3(VUNPCKLPx, V,x, H,x, W,x, vex4 p_00_66),
+ [0x15] = X86_OP_ENTRY3(VUNPCKHPx, V,x, H,x, W,x, vex4 p_00_66),
+ [0x16] = X86_OP_GROUP0(0F16),
+ /* Incorrectly listed as Mq,Vq in the manual */
+- [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex4 p_00_66),
++ [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66),
+
+ [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66),
+- [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3),
+- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3),
++ [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
++ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
++ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
+ [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */
+ [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */
+ [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */
+@@ -889,7 +901,7 @@ static const X86OpEntry opcodes_0F[256] = {
+
+ [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+ [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2),
++ [0x5a] = X86_OP_GROUP0(0F5A),
+ [0x5b] = X86_OP_GROUP0(0F5B),
+ [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+ [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+@@ -1102,7 +1114,7 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp
+ *ot = s->vex_l ? MO_256 : MO_128;
+ return true;
+
+- case X86_SIZE_ph: /* SSE/AVX packed half precision */
++ case X86_SIZE_xh: /* SSE/AVX packed half register */
+ *ot = s->vex_l ? MO_128 : MO_64;
+ return true;
+
+@@ -1458,9 +1470,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
+ * Instructions which differ between 00/66 and F2/F3 in the
+ * exception classification and the size of the memory operand.
+ */
+- assert(e->vex_class == 1 || e->vex_class == 2);
++ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+- e->vex_class = 3;
++ e->vex_class = e->vex_class < 4 ? 3 : 5;
+ if (s->vex_l) {
+ goto illegal;
+ }
+diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
+index cb6b8bcf67..a542ec1681 100644
+--- a/target/i386/tcg/decode-new.h
++++ b/target/i386/tcg/decode-new.h
+@@ -92,7 +92,7 @@ typedef enum X86OpSize {
+ /* Custom */
+ X86_SIZE_d64,
+ X86_SIZE_f64,
+- X86_SIZE_ph, /* SSE/AVX packed half precision */
++ X86_SIZE_xh, /* SSE/AVX packed half register */
+ } X86OpSize;
+
+ typedef enum X86CPUIDFeature {
+diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
+index 5d31fce65d..d6a9de8b3d 100644
+--- a/target/i386/tcg/emit.c.inc
++++ b/target/i386/tcg/emit.c.inc
+@@ -1917,12 +1917,22 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ set_cc_op(s, CC_OP_EFLAGS);
+ }
+
+-static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
++static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+- gen_unary_fp_sse(s, env, decode,
+- gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm,
+- gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm,
+- gen_helper_cvtsd2ss, gen_helper_cvtss2sd);
++ if (s->vex_l) {
++ gen_helper_cvtpd2ps_ymm(cpu_env, OP_PTR0, OP_PTR2);
++ } else {
++ gen_helper_cvtpd2ps_xmm(cpu_env, OP_PTR0, OP_PTR2);
++ }
++}
++
++static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
++{
++ if (s->vex_l) {
++ gen_helper_cvtps2pd_ymm(cpu_env, OP_PTR0, OP_PTR2);
++ } else {
++ gen_helper_cvtps2pd_xmm(cpu_env, OP_PTR0, OP_PTR2);
++ }
+ }
+
+ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+@@ -1939,6 +1949,16 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
+ }
+ }
+
++static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
++{
++ gen_helper_cvtsd2ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
++}
++
++static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
++{
++ gen_helper_cvtss2sd(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
++}
++
+ static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+ {
+ int vec_len = vector_len(s, decode);
+diff --git a/target/mips/tcg/msa.decode b/target/mips/tcg/msa.decode
+index 9575289195..4410e2a02e 100644
+--- a/target/mips/tcg/msa.decode
++++ b/target/mips/tcg/msa.decode
+@@ -31,8 +31,8 @@
+
+ @lsa ...... rs:5 rt:5 rd:5 ... sa:2 ...... &r
+ @ldst ...... sa:s10 ws:5 wd:5 .... df:2 &msa_i
+-@bz_v ...... ... .. wt:5 sa:16 &msa_bz df=3
+-@bz ...... ... df:2 wt:5 sa:16 &msa_bz
++@bz_v ...... ... .. wt:5 sa:s16 &msa_bz df=3
++@bz ...... ... df:2 wt:5 sa:s16 &msa_bz
+ @elm_df ...... .... ...... ws:5 wd:5 ...... &msa_elm_df df=%elm_df n=%elm_n
+ @elm ...... .......... ws:5 wd:5 ...... &msa_elm
+ @vec ...... ..... wt:5 ws:5 wd:5 ...... &msa_r df=0
+diff --git a/target/mips/tcg/tx79.decode b/target/mips/tcg/tx79.decode
+index 57d87a2076..578b8c54c0 100644
+--- a/target/mips/tcg/tx79.decode
++++ b/target/mips/tcg/tx79.decode
+@@ -24,7 +24,7 @@
+ @rs ...... rs:5 ..... .......... ...... &r sa=0 rt=0 rd=0
+ @rd ...... .......... rd:5 ..... ...... &r sa=0 rs=0 rt=0
+
+-@ldst ...... base:5 rt:5 offset:16 &i
++@ldst ...... base:5 rt:5 offset:s16 &i
+
+ ###########################################################################
+
+diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
+index 0e328ea0fd..7c3362d2e7 100644
+--- a/target/s390x/tcg/insn-data.h.inc
++++ b/target/s390x/tcg/insn-data.h.inc
+@@ -442,7 +442,7 @@
+ D(0xebe8, LAAG, RSY_a, ILA, r3, a2, new, in2_r1, laa, adds64, MO_TEUQ)
+ /* LOAD AND ADD LOGICAL */
+ D(0xebfa, LAAL, RSY_a, ILA, r3_32u, a2, new, in2_r1_32, laa, addu32, MO_TEUL)
+- D(0xebea, LAALG, RSY_a, ILA, r3, a2, new, in2_r1, laa, addu64, MO_TEUQ)
++ D(0xebea, LAALG, RSY_a, ILA, r3, a2, new, in2_r1, laa_addu64, addu64, MO_TEUQ)
+ /* LOAD AND AND */
+ D(0xebf4, LAN, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lan, nz32, MO_TESL)
+ D(0xebe4, LANG, RSY_a, ILA, r3, a2, new, in2_r1, lan, nz64, MO_TEUQ)
+diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
+index ff64d6c28f..b0173e968e 100644
+--- a/target/s390x/tcg/translate.c
++++ b/target/s390x/tcg/translate.c
+@@ -2809,17 +2809,32 @@ static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
+ return DISAS_NEXT;
+ }
+
+-static DisasJumpType op_laa(DisasContext *s, DisasOps *o)
++static DisasJumpType help_laa(DisasContext *s, DisasOps *o, bool addu64)
+ {
+ /* The real output is indeed the original value in memory;
+ recompute the addition for the computation of CC. */
+ tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+ s->insn->data | MO_ALIGN);
+ /* However, we need to recompute the addition for setting CC. */
+- tcg_gen_add_i64(o->out, o->in1, o->in2);
++ if (addu64) {
++ tcg_gen_movi_i64(cc_src, 0);
++ tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
++ } else {
++ tcg_gen_add_i64(o->out, o->in1, o->in2);
++ }
+ return DISAS_NEXT;
+ }
+
++static DisasJumpType op_laa(DisasContext *s, DisasOps *o)
++{
++ return help_laa(s, o, false);
++}
++
++static DisasJumpType op_laa_addu64(DisasContext *s, DisasOps *o)
++{
++ return help_laa(s, o, true);
++}
++
+ static DisasJumpType op_lan(DisasContext *s, DisasOps *o)
+ {
+ /* The real output is indeed the original value in memory;
+diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
+index 2c54a2825f..0594d3843b 100644
+--- a/target/tricore/cpu.c
++++ b/target/tricore/cpu.c
+@@ -100,14 +100,14 @@ static void tricore_cpu_realizefn(DeviceState *dev, Error **errp)
+ }
+
+ /* Some features automatically imply others */
+- if (tricore_feature(env, TRICORE_FEATURE_161)) {
++ if (tricore_has_feature(env, TRICORE_FEATURE_161)) {
+ set_feature(env, TRICORE_FEATURE_16);
+ }
+
+- if (tricore_feature(env, TRICORE_FEATURE_16)) {
++ if (tricore_has_feature(env, TRICORE_FEATURE_16)) {
+ set_feature(env, TRICORE_FEATURE_131);
+ }
+- if (tricore_feature(env, TRICORE_FEATURE_131)) {
++ if (tricore_has_feature(env, TRICORE_FEATURE_131)) {
+ set_feature(env, TRICORE_FEATURE_13);
+ }
+ cpu_reset(cs);
+diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
+index 3b9c533a7c..2e122b44a7 100644
+--- a/target/tricore/cpu.h
++++ b/target/tricore/cpu.h
+@@ -269,7 +269,7 @@ enum tricore_features {
+ TRICORE_FEATURE_161,
+ };
+
+-static inline int tricore_feature(CPUTriCoreState *env, int feature)
++static inline int tricore_has_feature(CPUTriCoreState *env, int feature)
+ {
+ return (env->features & (1ULL << feature)) != 0;
+ }
+diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
+index 532ae6b74c..676529f754 100644
+--- a/target/tricore/op_helper.c
++++ b/target/tricore/op_helper.c
+@@ -2528,7 +2528,7 @@ void helper_ret(CPUTriCoreState *env)
+ /* PCXI = new_PCXI; */
+ env->PCXI = new_PCXI;
+
+- if (tricore_feature(env, TRICORE_FEATURE_13)) {
++ if (tricore_has_feature(env, TRICORE_FEATURE_13)) {
+ /* PSW = new_PSW */
+ psw_write(env, new_PSW);
+ } else {
+@@ -2639,7 +2639,7 @@ void helper_rfm(CPUTriCoreState *env)
+ env->gpr_a[10] = cpu_ldl_data(env, env->DCX+8);
+ env->gpr_a[11] = cpu_ldl_data(env, env->DCX+12);
+
+- if (tricore_feature(env, TRICORE_FEATURE_131)) {
++ if (tricore_has_feature(env, TRICORE_FEATURE_131)) {
+ env->DBGTCR = 0;
+ }
+ }
+diff --git a/tests/migration/s390x/Makefile b/tests/migration/s390x/Makefile
+index 6393c3e5b9..6671de2efc 100644
+--- a/tests/migration/s390x/Makefile
++++ b/tests/migration/s390x/Makefile
+@@ -6,8 +6,8 @@ all: a-b-bios.h
+ fwdir=../../../pc-bios/s390-ccw
+
+ CFLAGS+=-ffreestanding -fno-delete-null-pointer-checks -fPIE -Os \
+- -msoft-float -march=z900 -fno-asynchronous-unwind-tables -Wl,-pie \
+- -Wl,--build-id=none -nostdlib
++ -msoft-float -march=z900 -fno-asynchronous-unwind-tables \
++ -fno-stack-protector -Wl,-pie -Wl,--build-id=none -nostdlib
+
+ a-b-bios.h: s390x.elf
+ echo "$$__note" > header.tmp
+diff --git a/tests/qemu-iotests/024 b/tests/qemu-iotests/024
+index 25a564a150..98a7c8fd65 100755
+--- a/tests/qemu-iotests/024
++++ b/tests/qemu-iotests/024
+@@ -199,6 +199,63 @@ echo
+ # $BASE_OLD and $BASE_NEW)
+ $QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
+
++# Check that rebase within the chain is working when
++# overlay_size > old_backing_size
++#
++# base_new <-- base_old <-- overlay
++#
++# Backing (new): 11 11 11 11 11
++# Backing (old): 22 22 22 22
++# Overlay: -- -- -- -- --
++#
++# As a result, overlay should contain data identical to base_old, with the
++# last cluster remaining unallocated.
++
++echo
++echo "=== Test rebase within one backing chain ==="
++echo
++
++echo "Creating backing chain"
++echo
++
++TEST_IMG=$BASE_NEW _make_test_img $(( CLUSTER_SIZE * 5 ))
++TEST_IMG=$BASE_OLD _make_test_img -b "$BASE_NEW" -F $IMGFMT \
++ $(( CLUSTER_SIZE * 4 ))
++TEST_IMG=$OVERLAY _make_test_img -b "$BASE_OLD" -F $IMGFMT \
++ $(( CLUSTER_SIZE * 5 ))
++
++echo
++echo "Fill backing files with data"
++echo
++
++$QEMU_IO "$BASE_NEW" -c "write -P 0x11 0 $(( CLUSTER_SIZE * 5 ))" \
++ | _filter_qemu_io
++$QEMU_IO "$BASE_OLD" -c "write -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
++ | _filter_qemu_io
++
++echo
++echo "Check the last cluster is zeroed in overlay before the rebase"
++echo
++$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
++ | _filter_qemu_io
++
++echo
++echo "Rebase onto another image in the same chain"
++echo
++
++$QEMU_IMG rebase -b "$BASE_NEW" -F $IMGFMT "$OVERLAY"
++
++echo "Verify that data is read the same before and after rebase"
++echo
++
++# Verify the first 4 clusters are still read the same as in the old base
++$QEMU_IO "$OVERLAY" -c "read -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
++ | _filter_qemu_io
++# Verify the last cluster still reads as zeroes
++$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
++ | _filter_qemu_io
++
++echo
+
+ # success, all done
+ echo "*** done"
+diff --git a/tests/qemu-iotests/024.out b/tests/qemu-iotests/024.out
+index 973a5a3711..245fe8b1d1 100644
+--- a/tests/qemu-iotests/024.out
++++ b/tests/qemu-iotests/024.out
+@@ -171,4 +171,34 @@ read 65536/65536 bytes at offset 196608
+ Offset Length File
+ 0 0x30000 TEST_DIR/subdir/t.IMGFMT
+ 0x30000 0x10000 TEST_DIR/subdir/t.IMGFMT.base_new
++
++=== Test rebase within one backing chain ===
++
++Creating backing chain
++
++Formatting 'TEST_DIR/subdir/t.IMGFMT.base_new', fmt=IMGFMT size=327680
++Formatting 'TEST_DIR/subdir/t.IMGFMT.base_old', fmt=IMGFMT size=262144 backing_file=TEST_DIR/subdir/t.IMGFMT.base_new backing_fmt=IMGFMT
++Formatting 'TEST_DIR/subdir/t.IMGFMT', fmt=IMGFMT size=327680 backing_file=TEST_DIR/subdir/t.IMGFMT.base_old backing_fmt=IMGFMT
++
++Fill backing files with data
++
++wrote 327680/327680 bytes at offset 0
++320 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++wrote 262144/262144 bytes at offset 0
++256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++Check the last cluster is zeroed in overlay before the rebase
++
++read 65536/65536 bytes at offset 262144
++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++Rebase onto another image in the same chain
++
++Verify that data is read the same before and after rebase
++
++read 262144/262144 bytes at offset 0
++256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 65536/65536 bytes at offset 262144
++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
+ *** done
+diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
+index 66652fed04..388223291f 100644
+--- a/tests/qtest/ahci-test.c
++++ b/tests/qtest/ahci-test.c
+@@ -1424,6 +1424,89 @@ static void test_reset(void)
+ ahci_shutdown(ahci);
+ }
+
++static void test_reset_pending_callback(void)
++{
++ AHCIQState *ahci;
++ AHCICommand *cmd;
++ uint8_t port;
++ uint64_t ptr1;
++ uint64_t ptr2;
++
++ int bufsize = 4 * 1024;
++ int speed = bufsize + (bufsize / 2);
++ int offset1 = 0;
++ int offset2 = bufsize / AHCI_SECTOR_SIZE;
++
++ g_autofree unsigned char *tx1 = g_malloc(bufsize);
++ g_autofree unsigned char *tx2 = g_malloc(bufsize);
++ g_autofree unsigned char *rx1 = g_malloc0(bufsize);
++ g_autofree unsigned char *rx2 = g_malloc0(bufsize);
++
++ /* Uses throttling to make test independent of specific environment. */
++ ahci = ahci_boot_and_enable("-drive if=none,id=drive0,file=%s,"
++ "cache=writeback,format=%s,"
++ "throttling.bps-write=%d "
++ "-M q35 "
++ "-device ide-hd,drive=drive0 ",
++ tmp_path, imgfmt, speed);
++
++ port = ahci_port_select(ahci);
++ ahci_port_clear(ahci, port);
++
++ ptr1 = ahci_alloc(ahci, bufsize);
++ ptr2 = ahci_alloc(ahci, bufsize);
++
++ g_assert(ptr1 && ptr2);
++
++ /* Need two different patterns. */
++ do {
++ generate_pattern(tx1, bufsize, AHCI_SECTOR_SIZE);
++ generate_pattern(tx2, bufsize, AHCI_SECTOR_SIZE);
++ } while (memcmp(tx1, tx2, bufsize) == 0);
++
++ qtest_bufwrite(ahci->parent->qts, ptr1, tx1, bufsize);
++ qtest_bufwrite(ahci->parent->qts, ptr2, tx2, bufsize);
++
++ /* Write to beginning of disk to check it wasn't overwritten later. */
++ ahci_guest_io(ahci, port, CMD_WRITE_DMA_EXT, ptr1, bufsize, offset1);
++
++ /* Issue asynchronously to get a pending callback during reset. */
++ cmd = ahci_command_create(CMD_WRITE_DMA_EXT);
++ ahci_command_adjust(cmd, offset2, ptr2, bufsize, 0);
++ ahci_command_commit(ahci, cmd, port);
++ ahci_command_issue_async(ahci, cmd);
++
++ ahci_set(ahci, AHCI_GHC, AHCI_GHC_HR);
++
++ ahci_command_free(cmd);
++
++ /* Wait for throttled write to finish. */
++ sleep(1);
++
++ /* Start again. */
++ ahci_clean_mem(ahci);
++ ahci_pci_enable(ahci);
++ ahci_hba_enable(ahci);
++ port = ahci_port_select(ahci);
++ ahci_port_clear(ahci, port);
++
++ /* Read and verify. */
++ ahci_guest_io(ahci, port, CMD_READ_DMA_EXT, ptr1, bufsize, offset1);
++ qtest_bufread(ahci->parent->qts, ptr1, rx1, bufsize);
++ g_assert_cmphex(memcmp(tx1, rx1, bufsize), ==, 0);
++
++ ahci_guest_io(ahci, port, CMD_READ_DMA_EXT, ptr2, bufsize, offset2);
++ qtest_bufread(ahci->parent->qts, ptr2, rx2, bufsize);
++ g_assert_cmphex(memcmp(tx2, rx2, bufsize), ==, 0);
++
++ ahci_free(ahci, ptr1);
++ ahci_free(ahci, ptr2);
++
++ ahci_clean_mem(ahci);
++
++ ahci_shutdown(ahci);
++}
++
+ static void test_ncq_simple(void)
+ {
+ AHCIQState *ahci;
+@@ -1943,7 +2026,8 @@ int main(int argc, char **argv)
+ qtest_add_func("/ahci/migrate/dma/halted", test_migrate_halted_dma);
+
+ qtest_add_func("/ahci/max", test_max);
+- qtest_add_func("/ahci/reset", test_reset);
++ qtest_add_func("/ahci/reset/simple", test_reset);
++ qtest_add_func("/ahci/reset/pending_callback", test_reset_pending_callback);
+
+ qtest_add_func("/ahci/io/ncq/simple", test_ncq_simple);
+ qtest_add_func("/ahci/migrate/ncq/simple", test_migrate_ncq);
+diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
+index 14bc013181..368a053392 100644
+--- a/tests/tcg/Makefile.target
++++ b/tests/tcg/Makefile.target
+@@ -123,7 +123,7 @@ else
+ # For softmmu targets we include a different Makefile fragement as the
+ # build options for bare programs are usually pretty different. They
+ # are expected to provide their own build recipes.
+-EXTRA_CFLAGS += -ffreestanding
++EXTRA_CFLAGS += -ffreestanding -fno-stack-protector
+ -include $(SRC_PATH)/tests/tcg/minilib/Makefile.target
+ -include $(SRC_PATH)/tests/tcg/multiarch/system/Makefile.softmmu-target
+ -include $(SRC_PATH)/tests/tcg/$(TARGET_NAME)/Makefile.softmmu-target
+diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
+index fc8d90ed69..a72578fccb 100644
+--- a/tests/tcg/aarch64/Makefile.target
++++ b/tests/tcg/aarch64/Makefile.target
+@@ -38,7 +38,7 @@ endif
+ # bti-1 tests the elf notes, so we require special compiler support.
+ ifneq ($(CROSS_CC_HAS_ARMV8_BTI),)
+ AARCH64_TESTS += bti-1 bti-3
+-bti-1 bti-3: CFLAGS += -mbranch-protection=standard
++bti-1 bti-3: CFLAGS += -fno-stack-protector -mbranch-protection=standard
+ bti-1 bti-3: LDFLAGS += -nostdlib
+ endif
+ # bti-2 tests PROT_BTI, so no special compiler support required.
+diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
+index b3b1504a1c..6b69672fcf 100644
+--- a/tests/tcg/arm/Makefile.target
++++ b/tests/tcg/arm/Makefile.target
+@@ -12,7 +12,7 @@ float_madds: CFLAGS+=-mfpu=neon-vfpv4
+
+ # Basic Hello World
+ ARM_TESTS = hello-arm
+-hello-arm: CFLAGS+=-marm -ffreestanding
++hello-arm: CFLAGS+=-marm -ffreestanding -fno-stack-protector
+ hello-arm: LDFLAGS+=-nostdlib
+
+ # IWMXT floating point extensions
+diff --git a/tests/tcg/cris/Makefile.target b/tests/tcg/cris/Makefile.target
+index 372287bd03..ea1053236f 100644
+--- a/tests/tcg/cris/Makefile.target
++++ b/tests/tcg/cris/Makefile.target
+@@ -30,7 +30,7 @@ AS = $(CC) -x assembler-with-cpp
+ LD = $(CC)
+
+ # we rely on GCC inline:ing the stuff we tell it to in many places here.
+-CFLAGS = -Winline -Wall -g -O2 -static
++CFLAGS = -Winline -Wall -g -O2 -static -fno-stack-protector
+ NOSTDFLAGS = -nostartfiles -nostdlib
+ ASFLAGS += -mcpu=v10 -g -Wa,-I,$(SRC_PATH)/tests/tcg/cris/bare
+ CRT_FILES = crt.o sys.o
+diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
+index 96a4d7a614..1b2b26e843 100644
+--- a/tests/tcg/hexagon/Makefile.target
++++ b/tests/tcg/hexagon/Makefile.target
+@@ -19,7 +19,7 @@
+ EXTRA_RUNS =
+
+ CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal
+-CFLAGS += -fno-unroll-loops
++CFLAGS += -fno-unroll-loops -fno-stack-protector
+
+ HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon
+ VPATH += $(HEX_SRC)
+diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
+index bafd8c2180..3aec3bba77 100644
+--- a/tests/tcg/i386/Makefile.target
++++ b/tests/tcg/i386/Makefile.target
+@@ -35,7 +35,7 @@ run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
+ #
+ # hello-i386 is a barebones app
+ #
+-hello-i386: CFLAGS+=-ffreestanding
++hello-i386: CFLAGS+=-ffreestanding -fno-stack-protector
+ hello-i386: LDFLAGS+=-nostdlib
+
+ # test-386 includes a couple of additional objects that need to be
+diff --git a/tests/tcg/i386/test-avx.py b/tests/tcg/i386/test-avx.py
+index d9ca00a49e..641a2ef69e 100755
+--- a/tests/tcg/i386/test-avx.py
++++ b/tests/tcg/i386/test-avx.py
+@@ -49,7 +49,7 @@
+ 'VEXTRACT[FI]128': 0x01,
+ 'VINSERT[FI]128': 0x01,
+ 'VPBLENDD': 0xff,
+- 'VPERM2[FI]128': 0x33,
++ 'VPERM2[FI]128': 0xbb,
+ 'VPERMPD': 0xff,
+ 'VPERMQ': 0xff,
+ 'VPERMILPS': 0xff,
+diff --git a/tests/tcg/minilib/Makefile.target b/tests/tcg/minilib/Makefile.target
+index c821d2806a..af0bf54be9 100644
+--- a/tests/tcg/minilib/Makefile.target
++++ b/tests/tcg/minilib/Makefile.target
+@@ -12,7 +12,7 @@ SYSTEM_MINILIB_SRC=$(SRC_PATH)/tests/tcg/minilib
+ MINILIB_SRCS=$(wildcard $(SYSTEM_MINILIB_SRC)/*.c)
+ MINILIB_OBJS=$(patsubst $(SYSTEM_MINILIB_SRC)/%.c, %.o, $(MINILIB_SRCS))
+
+-MINILIB_CFLAGS+=-nostdlib -ggdb -O0
++MINILIB_CFLAGS+=-nostdlib -fno-stack-protector -ggdb -O0
+ MINILIB_INC=-isystem $(SYSTEM_MINILIB_SRC)
+
+ .PRECIOUS: $(MINILIB_OBJS)
+diff --git a/tests/tcg/mips/Makefile.target b/tests/tcg/mips/Makefile.target
+index 1a994d5525..5d17c1706e 100644
+--- a/tests/tcg/mips/Makefile.target
++++ b/tests/tcg/mips/Makefile.target
+@@ -14,6 +14,6 @@ MIPS_TESTS=hello-mips
+
+ TESTS += $(MIPS_TESTS)
+
+-hello-mips: CFLAGS+=-mno-abicalls -fno-PIC -mabi=32
++hello-mips: CFLAGS+=-mno-abicalls -fno-PIC -fno-stack-protector -mabi=32
+ hello-mips: LDFLAGS+=-nostdlib
+ endif
+diff --git a/tests/tcg/mips/hello-mips.c b/tests/tcg/mips/hello-mips.c
+index 4e1cf501af..38e22d00e3 100644
+--- a/tests/tcg/mips/hello-mips.c
++++ b/tests/tcg/mips/hello-mips.c
+@@ -5,8 +5,8 @@
+ * http://www.linux-mips.org/wiki/MIPSABIHistory
+ * http://www.linux.com/howtos/Assembly-HOWTO/mips.shtml
+ *
+-* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -mabi=32 \
+-* -O2 -static -o hello-mips hello-mips.c
++* mipsel-linux-gcc -nostdlib -mno-abicalls -fno-PIC -fno-stack-protector \
++* -mabi=32 -O2 -static -o hello-mips hello-mips.c
+ *
+ */
+ #define __NR_SYSCALL_BASE 4000
+diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
+index cb90d4183d..ea9fa67152 100644
+--- a/tests/tcg/s390x/Makefile.target
++++ b/tests/tcg/s390x/Makefile.target
+@@ -24,6 +24,7 @@ TESTS+=trap
+ TESTS+=signals-s390x
+ TESTS+=branch-relative-long
+ TESTS+=noexec
++TESTS+=laalg
+
+ Z13_TESTS=vistr
+ Z13_TESTS+=lcbb
+diff --git a/tests/tcg/s390x/laalg.c b/tests/tcg/s390x/laalg.c
+new file mode 100644
+index 0000000000..797d168bb1
+--- /dev/null
++++ b/tests/tcg/s390x/laalg.c
+@@ -0,0 +1,27 @@
++/*
++ * Test the LAALG instruction.
++ *
++ * SPDX-License-Identifier: GPL-2.0-or-later
++ */
++#include <assert.h>
++#include <stdlib.h>
++
++int main(void)
++{
++ unsigned long cc = 0, op1, op2 = 40, op3 = 2;
++
++ asm("slgfi %[cc],1\n" /* Set cc_src = -1. */
++ "laalg %[op1],%[op3],%[op2]\n"
++ "ipm %[cc]"
++ : [cc] "+r" (cc)
++ , [op1] "=r" (op1)
++ , [op2] "+T" (op2)
++ : [op3] "r" (op3)
++ : "cc");
++
++ assert(cc == 0xffffffff10ffffff);
++ assert(op1 == 40);
++ assert(op2 == 42);
++
++ return EXIT_SUCCESS;
++}
+diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
+index e99e3b0d8c..52c6246a33 100644
+--- a/ui/gtk-egl.c
++++ b/ui/gtk-egl.c
+@@ -66,15 +66,16 @@ void gd_egl_draw(VirtualConsole *vc)
+ #ifdef CONFIG_GBM
+ QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
+ #endif
+- int ww, wh;
++ int ww, wh, ws;
+
+ if (!vc->gfx.gls) {
+ return;
+ }
+
+ window = gtk_widget_get_window(vc->gfx.drawing_area);
+- ww = gdk_window_get_width(window);
+- wh = gdk_window_get_height(window);
++ ws = gdk_window_get_scale_factor(window);
++ ww = gdk_window_get_width(window) * ws;
++ wh = gdk_window_get_height(window) * ws;
+
+ if (vc->gfx.scanout_mode) {
+ #ifdef CONFIG_GBM
+@@ -300,7 +301,7 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
+ {
+ VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+ GdkWindow *window;
+- int ww, wh;
++ int ww, wh, ws;
+
+ if (!vc->gfx.scanout_mode) {
+ return;
+@@ -313,8 +314,9 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
+ vc->gfx.esurface, vc->gfx.ectx);
+
+ window = gtk_widget_get_window(vc->gfx.drawing_area);
+- ww = gdk_window_get_width(window);
+- wh = gdk_window_get_height(window);
++ ws = gdk_window_get_scale_factor(window);
++ ww = gdk_window_get_width(window) * ws;
++ wh = gdk_window_get_height(window) * ws;
+ egl_fb_setup_default(&vc->gfx.win_fb, ww, wh);
+ if (vc->gfx.cursor_fb.texture) {
+ egl_texture_blit(vc->gfx.gls, &vc->gfx.win_fb, &vc->gfx.guest_fb,
+diff --git a/ui/gtk.c b/ui/gtk.c
+index e681e8c319..283c41a1a1 100644
+--- a/ui/gtk.c
++++ b/ui/gtk.c
+@@ -2317,6 +2317,7 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
+ GdkDisplay *window_display;
+ GtkIconTheme *theme;
+ char *dir;
++ int idx;
+
+ if (!gtkinit) {
+ fprintf(stderr, "gtk initialization failed\n");
+@@ -2379,6 +2380,15 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
+ gtk_container_add(GTK_CONTAINER(s->window), s->vbox);
+
+ gtk_widget_show_all(s->window);
++
++ for (idx = 0;; idx++) {
++ QemuConsole *con = qemu_console_lookup_by_index(idx);
++ if (!con) {
++ break;
++ }
++ gtk_widget_realize(s->vc[idx].gfx.drawing_area);
++ }
++
+ if (opts->u.gtk.has_show_menubar &&
+ !opts->u.gtk.show_menubar) {
+ gtk_widget_hide(s->menu_bar);
+diff --git a/ui/vnc.c b/ui/vnc.c
+index 1856d57380..1ca16c0ff6 100644
+--- a/ui/vnc.c
++++ b/ui/vnc.c
+@@ -2219,7 +2219,7 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
+ break;
+ case VNC_ENCODING_XVP:
+ if (vs->vd->power_control) {
+- vs->features |= VNC_FEATURE_XVP;
++ vs->features |= VNC_FEATURE_XVP_MASK;
+ send_xvp_message(vs, VNC_XVP_CODE_INIT);
+ }
+ break;
+@@ -2468,7 +2468,7 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
+ vnc_client_cut_text(vs, read_u32(data, 4), data + 8);
+ break;
+ case VNC_MSG_CLIENT_XVP:
+- if (!(vs->features & VNC_FEATURE_XVP)) {
++ if (!vnc_has_feature(vs, VNC_FEATURE_XVP)) {
+ error_report("vnc: xvp client message while disabled");
+ vnc_client_error(vs);
+ break;
+@@ -2565,7 +2565,7 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
+ vs, vs->ioc, vs->as.fmt, vs->as.nchannels, vs->as.freq);
+ break;
+ default:
+- VNC_DEBUG("Invalid audio message %d\n", read_u8(data, 4));
++ VNC_DEBUG("Invalid audio message %d\n", read_u8(data, 2));
+ vnc_client_error(vs);
+ break;
+ }