56 files changed, 26429 insertions, 0 deletions
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
new file mode 100644
index 00000000..cbfd8c71
--- /dev/null
+++ b/hw/virtio/Kconfig
@@ -0,0 +1,87 @@
+config VIRTIO
+    bool
+
+config VIRTIO_RNG
+    bool
+    default y
+    depends on VIRTIO
+
+config VIRTIO_IOMMU
+    bool
+    default y
+    depends on PCI && VIRTIO
+
+config VIRTIO_PCI
+    bool
+    default y if PCI_DEVICES
+    depends on PCI
+    select VIRTIO
+
+config VIRTIO_MMIO
+    bool
+    select VIRTIO
+
+config VIRTIO_CCW
+    bool
+    select VIRTIO
+
+config VIRTIO_BALLOON
+    bool
+    default y
+    depends on VIRTIO
+
+config VIRTIO_CRYPTO
+    bool
+    default y
+    depends on VIRTIO
+
+config VIRTIO_PMEM_SUPPORTED
+    bool
+
+config VIRTIO_PMEM
+    bool
+    default y
+    depends on VIRTIO
+    depends on VIRTIO_PMEM_SUPPORTED
+    select MEM_DEVICE
+
+config VIRTIO_MEM_SUPPORTED
+    bool
+
+config VIRTIO_MEM
+    bool
+    default y
+    depends on VIRTIO
+    depends on LINUX
+    depends on VIRTIO_MEM_SUPPORTED
+    select MEM_DEVICE
+
+config VHOST_VSOCK
+    bool
+    default y
+    depends on VIRTIO && VHOST_KERNEL
+
+config VHOST_USER_VSOCK
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
+
+config VHOST_USER_I2C
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
+
+config VHOST_USER_RNG
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
+
+config VHOST_USER_FS
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
+
+config VHOST_USER_GPIO
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
new file mode 100644
index 00000000..dfed1e7a
--- /dev/null
+++ b/hw/virtio/meson.build
@@ -0,0 +1,67 @@
+softmmu_virtio_ss = ss.source_set()
+softmmu_virtio_ss.add(files('virtio-bus.c'))
+softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
+softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
+
+virtio_ss = ss.source_set()
+virtio_ss.add(files('virtio.c'))
+
+if have_vhost
+  virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c'))
+  if have_vhost_user
+    virtio_ss.add(files('vhost-user.c'))
+  endif
+  if have_vhost_vdpa
+    virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c'))
+  endif
+else
+  softmmu_virtio_ss.add(files('vhost-stub.c'))
+endif
+
+virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
+virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c'))
+virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 'vhost-vsock-common.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c', 'vhost-vsock-common.c'))
+virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c'))
+virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c'))
+virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c'))
+virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c'))
+
+virtio_pci_ss = ss.source_set()
+virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c'))
+
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio-scsi-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c'))
+
+virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
+
+specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss)
+softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss)
+softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c'))
+softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('virtio-stub.c'))
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
new file mode 100644
index 00000000..14fc5b9b
--- /dev/null
+++ b/hw/virtio/trace-events
@@ -0,0 +1,151 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# vhost.c
+vhost_commit(bool started, bool changed) "Started: %d Changed: %d"
+vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
+vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64
+vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
+vhost_section(const char *name) "%s"
+vhost_reject_section(const char *name, int d) "%s:%d"
+vhost_iotlb_miss(void *dev, int step) "%p step %d"
+vhost_dev_cleanup(void *dev) "%p"
+vhost_dev_start(void *dev, const char *name, bool vrings) "%p:%s vrings:%d"
+vhost_dev_stop(void *dev, const char *name, bool vrings) "%p:%s vrings:%d"
+
+
+# vhost-user.c
+vhost_user_postcopy_end_entry(void) ""
+vhost_user_postcopy_end_exit(void) ""
+vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d"
+vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64
+vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64
+vhost_user_postcopy_listen(void) ""
+vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d"
+vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64
+vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
+vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
+vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
+vhost_user_read(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
+vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
+vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p"
+
+# vhost-vdpa.c
+vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8
+vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8
+vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type)  "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8
+vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type)  "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8
+vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d"
+vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64
+vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8
+vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p"
+vhost_vdpa_cleanup(void *dev, void *vdpa) "dev: %p vdpa: %p"
+vhost_vdpa_memslots_limit(void *dev, int ret) "dev: %p = 0x%x"
+vhost_vdpa_set_mem_table(void *dev, uint32_t nregions, uint32_t padding) "dev: %p nregions: %"PRIu32" padding: 0x%"PRIx32
+vhost_vdpa_dump_regions(void *dev, int i, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint64_t flags_padding) "dev: %p %d: guest_phys_addr: 0x%"PRIx64" memory_size: 0x%"PRIx64" userspace_addr: 0x%"PRIx64" flags_padding: 0x%"PRIx64
+vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64
+vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32
+vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8
+vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d"
+vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
+vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
+vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
+vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
+vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
+vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
+vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
+vhost_vdpa_set_vring_num(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
+vhost_vdpa_set_vring_base(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
+vhost_vdpa_get_vring_base(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
+vhost_vdpa_set_vring_kick(void *dev, unsigned int index, int fd) "dev: %p index: %u fd: %d"
+vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p index: %u fd: %d"
+vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64
+vhost_vdpa_set_owner(void *dev) "dev: %p"
+vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
+vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64
+
+# virtio.c
+virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
+virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
+virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
+virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
+virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
+virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
+virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
+virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
+
+# virtio-rng.c
+virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready"
+virtio_rng_cpu_is_stopped(void *rng, int size) "rng %p: cpu is stopped, dropping %d bytes"
+virtio_rng_popped(void *rng) "rng %p: elem popped"
+virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed"
+virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left"
+virtio_rng_vm_state_change(void *rng, int running, int state) "rng %p: state change to running %d state %d"
+
+# virtio-balloon.c
+#
+virtio_balloon_bad_addr(uint64_t gpa) "0x%"PRIx64
+virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s gpa: 0x%"PRIx64
+virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
+virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
+virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: 0x%"PRIx64" num_pages: %d"
+
+# virtio-mmio.c
+virtio_mmio_read(uint64_t offset) "virtio_mmio_read offset 0x%" PRIx64
+virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write offset 0x%" PRIx64 " value 0x%" PRIx64
+virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " shift %d"
+virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
+virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
+
+# virtio-pci.c
+virtio_pci_notify(uint16_t vector) "virtio_pci_notify vec 0x%x"
+virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
+virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
+
+# hw/virtio/virtio-iommu.c
+virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_system_reset(void) "system reset!"
+virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
+virtio_iommu_device_status(uint8_t status) "driver status = %d"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_start, uint32_t domain_end, uint32_t probe_size, uint8_t bypass) "page_size_mask=0x%"PRIx64" input range start=0x%"PRIx64" input range end=0x%"PRIx64" domain range start=%d domain range end=%d probe_size=0x%x bypass=0x%x"
+virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x"
+virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
+virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
+virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
+virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
+virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
+virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
+virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s"
+virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
+virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
+virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
+virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
+virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
+virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64
+virtio_iommu_fill_resv_property(uint32_t devid, uint8_t subtype, uint64_t start, uint64_t end) "dev= %d, type=%d start=0x%"PRIx64" end=0x%"PRIx64
+virtio_iommu_notify_map(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64" flags=%d"
+virtio_iommu_notify_unmap(const char *name, uint64_t virt_start, uint64_t virt_end) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
+virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64
+virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64
+virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s"
+virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
+virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
+
+# virtio-mem.c
+virtio_mem_send_response(uint16_t type) "type=%" PRIu16
+virtio_mem_plug_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" PRIx64 " nb_blocks=%" PRIu16
+virtio_mem_unplug_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" PRIx64 " nb_blocks=%" PRIu16
+virtio_mem_unplugged_all(void) ""
+virtio_mem_unplug_all_request(void) ""
+virtio_mem_resized_usable_region(uint64_t old_size, uint64_t new_size) "old_size=0x%" PRIx64 "new_size=0x%" PRIx64
+virtio_mem_state_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" PRIx64 " nb_blocks=%" PRIu16
+virtio_mem_state_response(uint16_t state) "state=%" PRIu16
+
+# virtio-pmem.c
+virtio_pmem_flush_request(void) "flush request"
+virtio_pmem_response(void) "flush response"
+virtio_pmem_flush_done(int type) "fsync return=%d"
+
+# virtio-gpio.c
+virtio_gpio_start(void) "start"
+virtio_gpio_stop(void) "stop"
+virtio_gpio_set_status(uint8_t status) "0x%x"
diff --git a/hw/virtio/trace.h b/hw/virtio/trace.h
new file mode 100644
index 00000000..5d709706
--- /dev/null
+++ b/hw/virtio/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_virtio.h"
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
new file mode 100644
index 00000000..8e581575
--- /dev/null
+++ b/hw/virtio/vhost-backend.c
@@ -0,0 +1,410 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "standard-headers/linux/vhost_types.h"
+
+#include "hw/virtio/vhost-vdpa.h"
+#ifdef CONFIG_VHOST_KERNEL
+#include <linux/vhost.h>
+#include <sys/ioctl.h>
+
+static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
+                             void *arg)
+{
+    int fd = (uintptr_t) dev->opaque;
+    int ret;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+    ret = ioctl(fd, request, arg);
+    return ret < 0 ? -errno : ret;
+}
+
+static int vhost_kernel_init(struct vhost_dev *dev, void *opaque, Error **errp)
+{
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+    dev->opaque = opaque;
+
+    return 0;
+}
+
+static int vhost_kernel_cleanup(struct vhost_dev *dev)
+{
+    int fd = (uintptr_t) dev->opaque;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+    return close(fd) < 0 ? -errno : 0;
+}
+
+static int vhost_kernel_memslots_limit(struct vhost_dev *dev)
+{
+    int limit = 64;
+    char *s;
+
+    if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions",
+                            &s, NULL, NULL)) {
+        uint64_t val = g_ascii_strtoull(s, NULL, 10);
+        if (val < INT_MAX && val > 0) {
+            g_free(s);
+            return val;
+        }
+        error_report("ignoring invalid max_mem_regions value in vhost module:"
+                     " %s", s);
+    }
+    g_free(s);
+    return limit;
+}
+
+static int vhost_kernel_net_set_backend(struct vhost_dev *dev,
+                                        struct vhost_vring_file *file)
+{
+    return vhost_kernel_call(dev, VHOST_NET_SET_BACKEND, file);
+}
+
+static int vhost_kernel_scsi_set_endpoint(struct vhost_dev *dev,
+                                          struct vhost_scsi_target *target)
+{
+    return vhost_kernel_call(dev, VHOST_SCSI_SET_ENDPOINT, target);
+}
+
+static int vhost_kernel_scsi_clear_endpoint(struct vhost_dev *dev,
+                                            struct vhost_scsi_target *target)
+{
+    return vhost_kernel_call(dev, VHOST_SCSI_CLEAR_ENDPOINT, target);
+}
+
+static int vhost_kernel_scsi_get_abi_version(struct vhost_dev *dev, int *version)
+{
+    return vhost_kernel_call(dev, VHOST_SCSI_GET_ABI_VERSION, version);
+}
+
+static int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                     struct vhost_log *log)
+{
+    return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base);
+}
+
+static int vhost_kernel_set_mem_table(struct vhost_dev *dev,
+                                      struct vhost_memory *mem)
+{
+    return vhost_kernel_call(dev, VHOST_SET_MEM_TABLE, mem);
+}
+
+static int vhost_kernel_set_vring_addr(struct vhost_dev *dev,
+                                       struct vhost_vring_addr *addr)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_ADDR, addr);
+}
+
+static int vhost_kernel_set_vring_endian(struct vhost_dev *dev,
+                                         struct vhost_vring_state *ring)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_ENDIAN, ring);
+}
+
+static int vhost_kernel_set_vring_num(struct vhost_dev *dev,
+                                      struct vhost_vring_state *ring)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_NUM, ring);
+}
+
+static int vhost_kernel_set_vring_base(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_BASE, ring);
+}
+
+static int vhost_kernel_get_vring_base(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring)
+{
+    return vhost_kernel_call(dev, VHOST_GET_VRING_BASE, ring);
+}
+
+static int vhost_kernel_set_vring_kick(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_KICK, file);
+}
+
+static int vhost_kernel_set_vring_call(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_CALL, file);
+}
+
+static int vhost_kernel_set_vring_err(struct vhost_dev *dev,
+                                      struct vhost_vring_file *file)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_ERR, file);
+}
+
+static int vhost_kernel_set_vring_busyloop_timeout(struct vhost_dev *dev,
+                                                   struct vhost_vring_state *s)
+{
+    return vhost_kernel_call(dev, VHOST_SET_VRING_BUSYLOOP_TIMEOUT, s);
+}
+
+static int vhost_kernel_set_features(struct vhost_dev *dev,
+                                     uint64_t features)
+{
+    return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features);
+}
+
+static int vhost_kernel_set_backend_cap(struct vhost_dev *dev)
+{
+    uint64_t features;
+    uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2;
+    int r;
+
+    if (vhost_kernel_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
+        return 0;
+    }
+
+    features &= f;
+    r = vhost_kernel_call(dev, VHOST_SET_BACKEND_FEATURES,
+                              &features);
+    if (r) {
+        return 0;
+    }
+
+    dev->backend_cap = features;
+
+    return 0;
+}
+
+static int vhost_kernel_get_features(struct vhost_dev *dev,
+                                     uint64_t *features)
+{
+    return vhost_kernel_call(dev, VHOST_GET_FEATURES, features);
+}
+
+static int vhost_kernel_set_owner(struct vhost_dev *dev)
+{
+    return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL);
+}
+
+static int vhost_kernel_reset_device(struct vhost_dev *dev)
+{
+    return vhost_kernel_call(dev, VHOST_RESET_OWNER, NULL);
+}
+
+static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
+{
+    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+    return idx - dev->vq_index;
+}
+
+static int vhost_kernel_vsock_set_guest_cid(struct vhost_dev *dev,
+                                            uint64_t guest_cid)
+{
+    return vhost_kernel_call(dev, VHOST_VSOCK_SET_GUEST_CID, &guest_cid);
+}
+
+static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start)
+{
+    return vhost_kernel_call(dev, VHOST_VSOCK_SET_RUNNING, &start);
+}
+
+static void vhost_kernel_iotlb_read(void *opaque)
+{
+    struct vhost_dev *dev = opaque;
+    ssize_t len;
+
+    if (dev->backend_cap &
+        (0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)) {
+        struct vhost_msg_v2 msg;
+
+        while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) {
+            if (len < sizeof msg) {
+                error_report("Wrong vhost message len: %d", (int)len);
+                break;
+            }
+            if (msg.type != VHOST_IOTLB_MSG_V2) {
+                error_report("Unknown vhost iotlb message type");
+                break;
+            }
+
+            vhost_backend_handle_iotlb_msg(dev, &msg.iotlb);
+        }
+    } else {
+        struct vhost_msg msg;
+
+        while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) {
+            if (len < sizeof msg) {
+                error_report("Wrong vhost message len: %d", (int)len);
+                break;
+            }
+            if (msg.type != VHOST_IOTLB_MSG) {
+                error_report("Unknown vhost iotlb message type");
+                break;
+            }
+
+            vhost_backend_handle_iotlb_msg(dev, &msg.iotlb);
+        }
+    }
+}
+
+static int vhost_kernel_send_device_iotlb_msg(struct vhost_dev *dev,
+                                              struct vhost_iotlb_msg *imsg)
+{
+    if (dev->backend_cap & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)) {
+        struct vhost_msg_v2 msg = {};
+
+        msg.type = VHOST_IOTLB_MSG_V2;
+        msg.iotlb = *imsg;
+
+        if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
+            error_report("Fail to update device iotlb");
+            return -EFAULT;
+        }
+    } else {
+        struct vhost_msg msg = {};
+
+        msg.type = VHOST_IOTLB_MSG;
+        msg.iotlb = *imsg;
+
+        if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
+            error_report("Fail to update device iotlb");
+            return -EFAULT;
+        }
+    }
+
+    return 0;
+}
+
+static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
+                                           int enabled)
+{
+    if (enabled)
+        qemu_set_fd_handler((uintptr_t)dev->opaque,
+                            vhost_kernel_iotlb_read, NULL, dev);
+    else
+        qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
+}
+
+const VhostOps kernel_ops = {
+        .backend_type = VHOST_BACKEND_TYPE_KERNEL,
+        .vhost_backend_init = vhost_kernel_init,
+        .vhost_backend_cleanup = vhost_kernel_cleanup,
+        .vhost_backend_memslots_limit = vhost_kernel_memslots_limit,
+        .vhost_net_set_backend = vhost_kernel_net_set_backend,
+        .vhost_scsi_set_endpoint = vhost_kernel_scsi_set_endpoint,
+        .vhost_scsi_clear_endpoint = vhost_kernel_scsi_clear_endpoint,
+        .vhost_scsi_get_abi_version = vhost_kernel_scsi_get_abi_version,
+        .vhost_set_log_base = vhost_kernel_set_log_base,
+        .vhost_set_mem_table = vhost_kernel_set_mem_table,
+        .vhost_set_vring_addr = vhost_kernel_set_vring_addr,
+        .vhost_set_vring_endian = vhost_kernel_set_vring_endian,
+        .vhost_set_vring_num = vhost_kernel_set_vring_num,
+        .vhost_set_vring_base = vhost_kernel_set_vring_base,
+        .vhost_get_vring_base = vhost_kernel_get_vring_base,
+        .vhost_set_vring_kick = vhost_kernel_set_vring_kick,
+        .vhost_set_vring_call = vhost_kernel_set_vring_call,
+        .vhost_set_vring_err = vhost_kernel_set_vring_err,
+        .vhost_set_vring_busyloop_timeout =
+                                vhost_kernel_set_vring_busyloop_timeout,
+        .vhost_set_features = vhost_kernel_set_features,
+        .vhost_get_features = vhost_kernel_get_features,
+        .vhost_set_backend_cap = vhost_kernel_set_backend_cap,
+        .vhost_set_owner = vhost_kernel_set_owner,
+        .vhost_reset_device = vhost_kernel_reset_device,
+        .vhost_get_vq_index = vhost_kernel_get_vq_index,
+        .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid,
+        .vhost_vsock_set_running = vhost_kernel_vsock_set_running,
+        .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
+        .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
+};
+#endif
+
+int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
+                                             uint64_t iova, uint64_t uaddr,
+                                             uint64_t len,
+                                             IOMMUAccessFlags perm)
+{
+    struct vhost_iotlb_msg imsg;
+
+    imsg.iova =  iova;
+    imsg.uaddr = uaddr;
+    imsg.size = len;
+    imsg.type = VHOST_IOTLB_UPDATE;
+
+    switch (perm) {
+    case IOMMU_RO:
+        imsg.perm = VHOST_ACCESS_RO;
+        break;
+    case IOMMU_WO:
+        imsg.perm = VHOST_ACCESS_WO;
+        break;
+    case IOMMU_RW:
+        imsg.perm = VHOST_ACCESS_RW;
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    if (dev->vhost_ops && dev->vhost_ops->vhost_send_device_iotlb_msg)
+        return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+
+    return -ENODEV;
+}
+
+int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
+                                                 uint64_t iova, uint64_t len)
+{
+    struct vhost_iotlb_msg imsg;
+
+    imsg.iova = iova;
+    imsg.size = len;
+    imsg.type = VHOST_IOTLB_INVALIDATE;
+
+    if (dev->vhost_ops && dev->vhost_ops->vhost_send_device_iotlb_msg)
+        return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+
+    return -ENODEV;
+}
+
+int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
+                                          struct vhost_iotlb_msg *imsg)
+{
+    int ret = 0;
+
+    if (unlikely(!dev->vdev)) {
+        error_report("Unexpected IOTLB message when virtio device is stopped");
+        return -EINVAL;
+    }
+
+    switch (imsg->type) {
+    case VHOST_IOTLB_MISS:
+        ret = vhost_device_iotlb_miss(dev, imsg->iova,
+                                      imsg->perm != VHOST_ACCESS_RO);
+        break;
+    case VHOST_IOTLB_ACCESS_FAIL:
+        /* FIXME: report device iotlb error */
+        error_report("Access failure IOTLB message type not supported");
+        ret = -ENOTSUP;
+        break;
+    case VHOST_IOTLB_UPDATE:
+    case VHOST_IOTLB_INVALIDATE:
+    default:
+        error_report("Unexpected IOTLB message type");
+        ret = -EINVAL;
+        break;
+    }
+
+    return ret;
+}
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
new file mode 100644
index 00000000..3d03395a
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.c
@@ -0,0 +1,110 @@
+/*
+ * vhost software live migration iova tree
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iova-tree.h"
+#include "vhost-iova-tree.h"
+
+#define iova_min_addr qemu_real_host_page_size()
+
+/**
+ * VhostIOVATree, able to:
+ * - Translate iova address
+ * - Reverse translate iova address (from translated to iova)
+ * - Allocate IOVA regions for translated range (linear operation)
+ */
+struct VhostIOVATree {
+    /* First addressable iova address in the device */
+    uint64_t iova_first;
+
+    /* Last addressable iova address in the device */
+    uint64_t iova_last;
+
+    /* IOVA address to qemu memory maps. */
+    IOVATree *iova_taddr_map;
+};
+
+/**
+ * Create a new IOVA tree
+ *
+ * Returns the new IOVA tree
+ */
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
+{
+    VhostIOVATree *tree = g_new(VhostIOVATree, 1);
+
+    /* Some devices do not like 0 addresses */
+    tree->iova_first = MAX(iova_first, iova_min_addr);
+    tree->iova_last = iova_last;
+
+    tree->iova_taddr_map = iova_tree_new();
+    return tree;
+}
+
+/**
+ * Delete an iova tree
+ */
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
+{
+    iova_tree_destroy(iova_tree->iova_taddr_map);
+    g_free(iova_tree);
+}
+
+/**
+ * Find the IOVA address stored from a memory address
+ *
+ * @tree: The iova tree
+ * @map: The map with the memory address
+ *
+ * Return the stored mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
+                                        const DMAMap *map)
+{
+    return iova_tree_find_iova(tree->iova_taddr_map, map);
+}
+
+/**
+ * Allocate a new mapping
+ *
+ * @tree: The iova tree
+ * @map: The iova map
+ *
+ * Returns:
+ * - IOVA_OK if the map fits in the container
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
+ *
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
+ */
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
+{
+    /* Some vhost devices do not like addr 0. Skip first page */
+    hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+    if (map->translated_addr + map->size < map->translated_addr ||
+        map->perm == IOMMU_NONE) {
+        return IOVA_ERR_INVALID;
+    }
+
+    /* Allocate a node in IOVA address */
+    return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
+                               tree->iova_last);
+}
+
+/**
+ * Remove existing mappings from iova tree
+ *
+ * @iova_tree: The vhost iova tree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
+{
+    iova_tree_remove(iova_tree->iova_taddr_map, map);
+}
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
new file mode 100644
index 00000000..4adfd79f
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.h
@@ -0,0 +1,27 @@
+/*
+ * vhost software live migration iova tree
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
+
+#include "qemu/iova-tree.h"
+#include "exec/memory.h"
+
+typedef struct VhostIOVATree VhostIOVATree;
+
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
+
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
+                                        const DMAMap *map);
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+
+#endif
diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c
new file mode 100644
index 00000000..08980bc2
--- /dev/null
+++ b/hw/virtio/vhost-scsi-pci.c
@@ -0,0 +1,104 @@
+/*
+ * Vhost scsi PCI bindings
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
+ *
+ * Changes for QEMU mainline + tcm_vhost kernel upstream:
+ *  Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "standard-headers/linux/virtio_pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-scsi.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VHostSCSIPCI VHostSCSIPCI;
+
+/*
+ * vhost-scsi-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostSCSIPCI, VHOST_SCSI_PCI,
+                         TYPE_VHOST_SCSI_PCI)
+
+struct VHostSCSIPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostSCSI vdev;
+};
+
+static Property vhost_scsi_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostSCSIPCI *dev = VHOST_SCSI_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOSCSIConf *conf = &dev->vdev.parent_obj.parent_obj.conf;
+
+    if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) {
+        conf->num_queues =
+            virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_scsi_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_scsi_pci_realize;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vhost_scsi_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_scsi_pci_instance_init(Object *obj)
+{
+    VHostSCSIPCI *dev = VHOST_SCSI_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_SCSI);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = {
+    .base_name             = TYPE_VHOST_SCSI_PCI,
+    .generic_name          = "vhost-scsi-pci",
+    .transitional_name     = "vhost-scsi-pci-transitional",
+    .non_transitional_name = "vhost-scsi-pci-non-transitional",
+    .instance_size = sizeof(VHostSCSIPCI),
+    .instance_init = vhost_scsi_pci_instance_init,
+    .class_init    = vhost_scsi_pci_class_init,
+};
+
+static void vhost_scsi_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_scsi_pci_info);
+}
+
+type_init(vhost_scsi_pci_register)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
new file mode 100644
index 00000000..5bd14cad
--- /dev/null
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -0,0 +1,769 @@
+/*
+ * vhost shadow virtqueue
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
+
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
+#include "linux-headers/linux/vhost.h"
+
+/**
+ * Validate the transport device features that both guests can use with the SVQ
+ * and SVQs can use with the device.
+ *
+ * @dev_features: The features
+ * @errp: Error pointer
+ */
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
+{
+    bool ok = true;
+    uint64_t svq_features = features;
+
+    for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
+         ++b) {
+        switch (b) {
+        case VIRTIO_F_ANY_LAYOUT:
+        case VIRTIO_RING_F_EVENT_IDX:
+            continue;
+
+        case VIRTIO_F_ACCESS_PLATFORM:
+            /* SVQ trust in the host's IOMMU to translate addresses */
+        case VIRTIO_F_VERSION_1:
+            /* SVQ trust that the guest vring is little endian */
+            if (!(svq_features & BIT_ULL(b))) {
+                svq_features |= BIT_ULL(b);
+                ok = false;
+            }
+            continue;
+
+        default:
+            if (svq_features & BIT_ULL(b)) {
+                svq_features &= ~BIT_ULL(b);
+                ok = false;
+            }
+        }
+    }
+
+    if (!ok) {
+        error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
+                         ", ok: 0x%"PRIx64, features, svq_features);
+    }
+    return ok;
+}
+
+/**
+ * Number of descriptors that the SVQ can make available from the guest.
+ *
+ * @svq: The svq
+ */
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
+{
+    return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
+}
+
+/**
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
+ *
+ * @svq: Shadow VirtQueue
+ * @vaddr: Translated IOVA addresses
+ * @iovec: Source qemu's VA addresses
+ * @num: Length of iovec and minimum length of vaddr
+ */
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
+                                     hwaddr *addrs, const struct iovec *iovec,
+                                     size_t num)
+{
+    if (num == 0) {
+        return true;
+    }
+
+    for (size_t i = 0; i < num; ++i) {
+        DMAMap needle = {
+            .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+            .size = iovec[i].iov_len,
+        };
+        Int128 needle_last, map_last;
+        size_t off;
+
+        const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+        /*
+         * Map cannot be NULL since iova map contains all guest space and
+         * qemu already has a physical address mapped
+         */
+        if (unlikely(!map)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Invalid address 0x%"HWADDR_PRIx" given by guest",
+                          needle.translated_addr);
+            return false;
+        }
+
+        off = needle.translated_addr - map->translated_addr;
+        addrs[i] = map->iova + off;
+
+        needle_last = int128_add(int128_make64(needle.translated_addr),
+                                 int128_make64(iovec[i].iov_len));
+        map_last = int128_make64(map->translated_addr + map->size);
+        if (unlikely(int128_gt(needle_last, map_last))) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Guest buffer expands over iova range");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+                                        const struct iovec *iovec, size_t num,
+                                        bool more_descs, bool write)
+{
+    uint16_t i = svq->free_head, last = svq->free_head;
+    unsigned n;
+    uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
+    vring_desc_t *descs = svq->vring.desc;
+    bool ok;
+
+    if (num == 0) {
+        return true;
+    }
+
+    ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+    if (unlikely(!ok)) {
+        return false;
+    }
+
+    for (n = 0; n < num; n++) {
+        if (more_descs || (n + 1 < num)) {
+            descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
+            descs[i].next = cpu_to_le16(svq->desc_next[i]);
+        } else {
+            descs[i].flags = flags;
+        }
+        descs[i].addr = cpu_to_le64(sg[n]);
+        descs[i].len = cpu_to_le32(iovec[n].iov_len);
+
+        last = i;
+        i = cpu_to_le16(svq->desc_next[i]);
+    }
+
+    svq->free_head = le16_to_cpu(svq->desc_next[last]);
+    return true;
+}
+
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
+                                const struct iovec *out_sg, size_t out_num,
+                                const struct iovec *in_sg, size_t in_num,
+                                unsigned *head)
+{
+    unsigned avail_idx;
+    vring_avail_t *avail = svq->vring.avail;
+    bool ok;
+    g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
+
+    *head = svq->free_head;
+
+    /* We need some descriptors here */
+    if (unlikely(!out_num && !in_num)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Guest provided element with no descriptors");
+        return false;
+    }
+
+    ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+                                     false);
+    if (unlikely(!ok)) {
+        return false;
+    }
+
+    ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+    if (unlikely(!ok)) {
+        return false;
+    }
+
+    /*
+     * Put the entry in the available array (but don't update avail->idx until
+     * they do sync).
+     */
+    avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
+    avail->ring[avail_idx] = cpu_to_le16(*head);
+    svq->shadow_avail_idx++;
+
+    /* Update the avail index after write the descriptor */
+    smp_wmb();
+    avail->idx = cpu_to_le16(svq->shadow_avail_idx);
+
+    return true;
+}
+
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+    bool needs_kick;
+
+    /*
+     * We need to expose the available array entries before checking the used
+     * flags
+     */
+    smp_mb();
+
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]);
+        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
+    } else {
+        needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+    }
+
+    if (!needs_kick) {
+        return;
+    }
+
+    event_notifier_set(&svq->hdev_kick);
+}
+
+/**
+ * Add an element to a SVQ.
+ *
+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
+ */
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+                  size_t out_num, const struct iovec *in_sg, size_t in_num,
+                  VirtQueueElement *elem)
+{
+    unsigned qemu_head;
+    unsigned ndescs = in_num + out_num;
+    bool ok;
+
+    if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
+        return -ENOSPC;
+    }
+
+    ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+    if (unlikely(!ok)) {
+        return -EINVAL;
+    }
+
+    svq->desc_state[qemu_head].elem = elem;
+    svq->desc_state[qemu_head].ndescs = ndescs;
+    vhost_svq_kick(svq);
+    return 0;
+}
+
+/* Convenience wrapper to add a guest's element to SVQ */
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
+                                 VirtQueueElement *elem)
+{
+    return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+                         elem->in_num, elem);
+}
+
+/**
+ * Forward available buffers.
+ *
+ * @svq: Shadow VirtQueue
+ *
+ * Note that this function does not guarantee that all guest's available
+ * buffers are available to the device in SVQ avail ring. The guest may have
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
+ * qemu vaddr.
+ *
+ * If that happens, guest's kick notifications will be disabled until the
+ * device uses some buffers.
+ */
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
+{
+    /* Clear event notifier */
+    event_notifier_test_and_clear(&svq->svq_kick);
+
+    /* Forward to the device as many available buffers as possible */
+    do {
+        virtio_queue_set_notification(svq->vq, false);
+
+        while (true) {
+            g_autofree VirtQueueElement *elem = NULL;
+            int r;
+
+            if (svq->next_guest_avail_elem) {
+                elem = g_steal_pointer(&svq->next_guest_avail_elem);
+            } else {
+                elem = virtqueue_pop(svq->vq, sizeof(*elem));
+            }
+
+            if (!elem) {
+                break;
+            }
+
+            if (svq->ops) {
+                r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
+            } else {
+                r = vhost_svq_add_element(svq, elem);
+            }
+            if (unlikely(r != 0)) {
+                if (r == -ENOSPC) {
+                    /*
+                     * This condition is possible since a contiguous buffer in
+                     * GPA does not imply a contiguous buffer in qemu's VA
+                     * scatter-gather segments. If that happens, the buffer
+                     * exposed to the device needs to be a chain of descriptors
+                     * at this moment.
+                     *
+                     * SVQ cannot hold more available buffers if we are here:
+                     * queue the current guest descriptor and ignore kicks
+                     * until some elements are used.
+                     */
+                    svq->next_guest_avail_elem = g_steal_pointer(&elem);
+                }
+
+                /* VQ is full or broken, just return and ignore kicks */
+                return;
+            }
+            /* elem belongs to SVQ or external caller now */
+            elem = NULL;
+        }
+
+        virtio_queue_set_notification(svq->vq, true);
+    } while (!virtio_queue_empty(svq->vq));
+}
+
+/**
+ * Handle guest's kick.
+ *
+ * @n: guest kick event notifier, the one that guest set to notify svq.
+ */
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
+{
+    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
+    event_notifier_test_and_clear(n);
+    vhost_handle_guest_kick(svq);
+}
+
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
+{
+    uint16_t *used_idx = &svq->vring.used->idx;
+    if (svq->last_used_idx != svq->shadow_used_idx) {
+        return true;
+    }
+
+    svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
+
+    return svq->last_used_idx != svq->shadow_used_idx;
+}
+
+/**
+ * Enable vhost device calls after disable them.
+ *
+ * @svq: The svq
+ *
+ * It returns false if there are pending used buffers from the vhost device,
+ * avoiding the possible races between SVQ checking for more work and enabling
+ * callbacks. True if SVQ used vring has no more pending buffers.
+ */
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
+{
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
+        *used_event = svq->shadow_used_idx;
+    } else {
+        svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
+    }
+
+    /* Make sure the event is enabled before the read of used_idx */
+    smp_mb();
+    return !vhost_svq_more_used(svq);
+}
+
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
+{
+    /*
+     * No need to disable notification in the event idx case, since used event
+     * index is already an index too far away.
+     */
+    if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
+    }
+}
+
+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
+                                             uint16_t num, uint16_t i)
+{
+    for (uint16_t j = 0; j < (num - 1); ++j) {
+        i = le16_to_cpu(svq->desc_next[i]);
+    }
+
+    return i;
+}
+
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
+                                           uint32_t *len)
+{
+    const vring_used_t *used = svq->vring.used;
+    vring_used_elem_t used_elem;
+    uint16_t last_used, last_used_chain, num;
+
+    if (!vhost_svq_more_used(svq)) {
+        return NULL;
+    }
+
+    /* Only get used array entries after they have been exposed by dev */
+    smp_rmb();
+    last_used = svq->last_used_idx & (svq->vring.num - 1);
+    used_elem.id = le32_to_cpu(used->ring[last_used].id);
+    used_elem.len = le32_to_cpu(used->ring[last_used].len);
+
+    svq->last_used_idx++;
+    if (unlikely(used_elem.id >= svq->vring.num)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
+                      svq->vdev->name, used_elem.id);
+        return NULL;
+    }
+
+    if (unlikely(!svq->desc_state[used_elem.id].ndescs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+            "Device %s says index %u is used, but it was not available",
+            svq->vdev->name, used_elem.id);
+        return NULL;
+    }
+
+    num = svq->desc_state[used_elem.id].ndescs;
+    svq->desc_state[used_elem.id].ndescs = 0;
+    last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
+    svq->desc_next[last_used_chain] = svq->free_head;
+    svq->free_head = used_elem.id;
+
+    *len = used_elem.len;
+    return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
+}
+
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+                         const VirtQueueElement *elem, uint32_t len)
+{
+    virtqueue_push(svq->vq, elem, len);
+    if (svq->next_guest_avail_elem) {
+        /*
+         * Avail ring was full when vhost_svq_flush was called, so it's a
+         * good moment to make more descriptors available if possible.
+         */
+        vhost_handle_guest_kick(svq);
+    }
+}
+
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
+                            bool check_for_avail_queue)
+{
+    VirtQueue *vq = svq->vq;
+
+    /* Forward as many used buffers as possible. */
+    do {
+        unsigned i = 0;
+
+        vhost_svq_disable_notification(svq);
+        while (true) {
+            uint32_t len;
+            g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
+            if (!elem) {
+                break;
+            }
+
+            if (unlikely(i >= svq->vring.num)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                         "More than %u used buffers obtained in a %u size SVQ",
+                         i, svq->vring.num);
+                virtqueue_fill(vq, elem, len, i);
+                virtqueue_flush(vq, i);
+                return;
+            }
+            virtqueue_fill(vq, elem, len, i++);
+        }
+
+        virtqueue_flush(vq, i);
+        event_notifier_set(&svq->svq_call);
+
+        if (check_for_avail_queue && svq->next_guest_avail_elem) {
+            /*
+             * Avail ring was full when vhost_svq_flush was called, so it's a
+             * good moment to make more descriptors available if possible.
+             */
+            vhost_handle_guest_kick(svq);
+        }
+    } while (!vhost_svq_enable_notification(svq));
+}
+
+/**
+ * Poll the SVQ for one device used buffer.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the length written by the device.
+ */
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+    int64_t start_us = g_get_monotonic_time();
+    uint32_t len;
+
+    do {
+        if (vhost_svq_more_used(svq)) {
+            break;
+        }
+
+        if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
+            return 0;
+        }
+    } while (true);
+
+    vhost_svq_get_buf(svq, &len);
+    return len;
+}
+
+/**
+ * Forward used buffers.
+ *
+ * @n: hdev call event notifier, the one that device set to notify svq.
+ *
+ * Note that we are not making any buffers available in the loop, there is no
+ * way that it runs more than virtqueue size times.
+ */
+static void vhost_svq_handle_call(EventNotifier *n)
+{
+    VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
+                                             hdev_call);
+    event_notifier_test_and_clear(n);
+    vhost_svq_flush(svq, true);
+}
+
+/**
+ * Set the call notifier for the SVQ to call the guest
+ *
+ * @svq: Shadow virtqueue
+ * @call_fd: call notifier
+ *
+ * Called on BQL context.
+ */
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
+{
+    if (call_fd == VHOST_FILE_UNBIND) {
+        /*
+         * Fail event_notifier_set if called handling device call.
+         *
+         * SVQ still needs device notifications, since it needs to keep
+         * forwarding used buffers even with the unbind.
+         */
+        memset(&svq->svq_call, 0, sizeof(svq->svq_call));
+    } else {
+        event_notifier_init_fd(&svq->svq_call, call_fd);
+    }
+}
+
+/**
+ * Get the shadow vq vring address.
+ * @svq: Shadow virtqueue
+ * @addr: Destination to store address
+ */
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
+                              struct vhost_vring_addr *addr)
+{
+    addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
+    addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
+    addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
+}
+
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
+{
+    size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
+    size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) +
+                                                              sizeof(uint16_t);
+
+    return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size());
+}
+
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
+{
+    size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) +
+                                                              sizeof(uint16_t);
+    return ROUND_UP(used_size, qemu_real_host_page_size());
+}
+
+/**
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
+ *
+ * @svq: The svq
+ * @svq_kick_fd: The svq kick fd
+ *
+ * Note that the SVQ will never close the old file descriptor.
+ */
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
+{
+    EventNotifier *svq_kick = &svq->svq_kick;
+    bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
+    bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
+
+    if (poll_stop) {
+        event_notifier_set_handler(svq_kick, NULL);
+    }
+
+    event_notifier_init_fd(svq_kick, svq_kick_fd);
+    /*
+     * event_notifier_set_handler already checks for guest's notifications if
+     * they arrive at the new file descriptor in the switch, so there is no
+     * need to explicitly check for them.
+     */
+    if (poll_start) {
+        event_notifier_set(svq_kick);
+        event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
+    }
+}
+
+/**
+ * Start the shadow virtqueue operation.
+ *
+ * @svq: Shadow Virtqueue
+ * @vdev: VirtIO device
+ * @vq: Virtqueue to shadow
+ */
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
+                     VirtQueue *vq)
+{
+    size_t desc_size, driver_size, device_size;
+
+    svq->next_guest_avail_elem = NULL;
+    svq->shadow_avail_idx = 0;
+    svq->shadow_used_idx = 0;
+    svq->last_used_idx = 0;
+    svq->vdev = vdev;
+    svq->vq = vq;
+
+    svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
+    driver_size = vhost_svq_driver_area_size(svq);
+    device_size = vhost_svq_device_area_size(svq);
+    svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size);
+    desc_size = sizeof(vring_desc_t) * svq->vring.num;
+    svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
+    memset(svq->vring.desc, 0, driver_size);
+    svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
+    memset(svq->vring.used, 0, device_size);
+    svq->desc_state = g_new0(SVQDescState, svq->vring.num);
+    svq->desc_next = g_new0(uint16_t, svq->vring.num);
+    for (unsigned i = 0; i < svq->vring.num - 1; i++) {
+        svq->desc_next[i] = cpu_to_le16(i + 1);
+    }
+}
+
+/**
+ * Stop the shadow virtqueue operation.
+ * @svq: Shadow Virtqueue
+ */
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
+{
+    vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND);
+    g_autofree VirtQueueElement *next_avail_elem = NULL;
+
+    if (!svq->vq) {
+        return;
+    }
+
+    /* Send all pending used descriptors to guest */
+    vhost_svq_flush(svq, false);
+
+    for (unsigned i = 0; i < svq->vring.num; ++i) {
+        g_autofree VirtQueueElement *elem = NULL;
+        elem = g_steal_pointer(&svq->desc_state[i].elem);
+        if (elem) {
+            virtqueue_detach_element(svq->vq, elem, 0);
+        }
+    }
+
+    next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
+    if (next_avail_elem) {
+        virtqueue_detach_element(svq->vq, next_avail_elem, 0);
+    }
+    svq->vq = NULL;
+    g_free(svq->desc_next);
+    g_free(svq->desc_state);
+    qemu_vfree(svq->vring.desc);
+    qemu_vfree(svq->vring.used);
+}
+
+/**
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
+ * shadow methods and file descriptors.
+ *
+ * @iova_tree: Tree to perform descriptors translations
+ * @ops: SVQ owner callbacks
+ * @ops_opaque: ops opaque pointer
+ *
+ * Returns the new virtqueue or NULL.
+ *
+ * In case of error, reason is reported through error_report.
+ */
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+                                    const VhostShadowVirtqueueOps *ops,
+                                    void *ops_opaque)
+{
+    g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
+    int r;
+
+    r = event_notifier_init(&svq->hdev_kick, 0);
+    if (r != 0) {
+        error_report("Couldn't create kick event notifier: %s (%d)",
+                     g_strerror(errno), errno);
+        goto err_init_hdev_kick;
+    }
+
+    r = event_notifier_init(&svq->hdev_call, 0);
+    if (r != 0) {
+        error_report("Couldn't create call event notifier: %s (%d)",
+                     g_strerror(errno), errno);
+        goto err_init_hdev_call;
+    }
+
+    event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
+    event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
+    svq->iova_tree = iova_tree;
+    svq->ops = ops;
+    svq->ops_opaque = ops_opaque;
+    return g_steal_pointer(&svq);
+
+err_init_hdev_call:
+    event_notifier_cleanup(&svq->hdev_kick);
+
+err_init_hdev_kick:
+    return NULL;
+}
+
+/**
+ * Free the resources of the shadow virtqueue.
+ *
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
+ */
+void vhost_svq_free(gpointer pvq)
+{
+    VhostShadowVirtqueue *vq = pvq;
+    vhost_svq_stop(vq);
+    event_notifier_cleanup(&vq->hdev_kick);
+    event_notifier_set_handler(&vq->hdev_call, NULL);
+    event_notifier_cleanup(&vq->hdev_call);
+    g_free(vq);
+}
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
new file mode 100644
index 00000000..d04c34a5
--- /dev/null
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -0,0 +1,139 @@
+/*
+ * vhost shadow virtqueue
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
+#define VHOST_SHADOW_VIRTQUEUE_H
+
+#include "qemu/event_notifier.h"
+#include "hw/virtio/virtio.h"
+#include "standard-headers/linux/vhost_types.h"
+#include "hw/virtio/vhost-iova-tree.h"
+
+typedef struct SVQDescState {
+    VirtQueueElement *elem;
+
+    /*
+     * Number of descriptors exposed to the device. May or may not match
+     * guest's
+     */
+    unsigned int ndescs;
+} SVQDescState;
+
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns 0 if the vq is running as expected.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+                                      VirtQueueElement *elem,
+                                      void *vq_callback_opaque);
+
+typedef struct VhostShadowVirtqueueOps {
+    VirtQueueAvailCallback avail_handler;
+} VhostShadowVirtqueueOps;
+
+/* Shadow virtqueue to relay notifications */
+typedef struct VhostShadowVirtqueue {
+    /* Shadow vring */
+    struct vring vring;
+
+    /* Shadow kick notifier, sent to vhost */
+    EventNotifier hdev_kick;
+    /* Shadow call notifier, sent to vhost */
+    EventNotifier hdev_call;
+
+    /*
+     * Borrowed virtqueue's guest to host notifier. To borrow it in this event
+     * notifier allows to recover the VhostShadowVirtqueue from the event loop
+     * easily. If we use the VirtQueue's one, we don't have an easy way to
+     * retrieve VhostShadowVirtqueue.
+     *
+     * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
+     */
+    EventNotifier svq_kick;
+
+    /* Guest's call notifier, where the SVQ calls guest. */
+    EventNotifier svq_call;
+
+    /* Virtio queue shadowing */
+    VirtQueue *vq;
+
+    /* Virtio device */
+    VirtIODevice *vdev;
+
+    /* IOVA mapping */
+    VhostIOVATree *iova_tree;
+
+    /* SVQ vring descriptors state */
+    SVQDescState *desc_state;
+
+    /* Next VirtQueue element that guest made available */
+    VirtQueueElement *next_guest_avail_elem;
+
+    /*
+     * Backup next field for each descriptor so we can recover securely, not
+     * needing to trust the device access.
+     */
+    uint16_t *desc_next;
+
+    /* Caller callbacks */
+    const VhostShadowVirtqueueOps *ops;
+
+    /* Caller callbacks opaque */
+    void *ops_opaque;
+
+    /* Next head to expose to the device */
+    uint16_t shadow_avail_idx;
+
+    /* Next free descriptor */
+    uint16_t free_head;
+
+    /* Last seen used idx */
+    uint16_t shadow_used_idx;
+
+    /* Next head to consume from the device */
+    uint16_t last_used_idx;
+} VhostShadowVirtqueue;
+
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
+
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+                         const VirtQueueElement *elem, uint32_t len);
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+                  size_t out_num, const struct iovec *in_sg, size_t in_num,
+                  VirtQueueElement *elem);
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
+
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
+                              struct vhost_vring_addr *addr);
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
+
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
+                     VirtQueue *vq);
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
+
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+                                    const VhostShadowVirtqueueOps *ops,
+                                    void *ops_opaque);
+
+void vhost_svq_free(gpointer vq);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
+
+#endif
diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c
new file mode 100644
index 00000000..c175148f
--- /dev/null
+++ b/hw/virtio/vhost-stub.c
@@ -0,0 +1,17 @@
+#include "qemu/osdep.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+bool vhost_has_free_slot(void)
+{
+    return true;
+}
+
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
+{
+    return false;
+}
+
+void vhost_user_cleanup(VhostUserState *user)
+{
+}
diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c
new file mode 100644
index 00000000..eef8641a
--- /dev/null
+++ b/hw/virtio/vhost-user-blk-pci.c
@@ -0,0 +1,109 @@
+/*
+ * Vhost user blk PCI Bindings
+ *
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ *  Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
+ * Felipe Franciosi <felipe@nutanix.com>
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "standard-headers/linux/virtio_pci.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost-user-blk.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VHostUserBlkPCI VHostUserBlkPCI;
+
+/*
+ * vhost-user-blk-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostUserBlkPCI, VHOST_USER_BLK_PCI,
+                         TYPE_VHOST_USER_BLK_PCI)
+
+struct VHostUserBlkPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserBlk vdev;
+};
+
+static Property vhost_user_blk_pci_properties[] = {
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    if (dev->vdev.num_queues == VHOST_USER_BLK_AUTO_NUM_QUEUES) {
+        dev->vdev.num_queues = virtio_pci_optimal_num_queues(0);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = dev->vdev.num_queues + 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vhost_user_blk_pci_properties);
+    k->realize = vhost_user_blk_pci_realize;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_user_blk_pci_instance_init(Object *obj)
+{
+    VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_BLK);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = {
+    .base_name               = TYPE_VHOST_USER_BLK_PCI,
+    .generic_name            = "vhost-user-blk-pci",
+    .transitional_name       = "vhost-user-blk-pci-transitional",
+    .non_transitional_name   = "vhost-user-blk-pci-non-transitional",
+    .instance_size  = sizeof(VHostUserBlkPCI),
+    .instance_init  = vhost_user_blk_pci_instance_init,
+    .class_init     = vhost_user_blk_pci_class_init,
+};
+
+static void vhost_user_blk_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_blk_pci_info);
+}
+
+type_init(vhost_user_blk_pci_register)
diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c
new file mode 100644
index 00000000..6829b8b7
--- /dev/null
+++ b/hw/virtio/vhost-user-fs-pci.c
@@ -0,0 +1,88 @@
+/*
+ * Vhost-user filesystem virtio device PCI glue
+ *
+ * Copyright 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Dr. David Alan Gilbert <dgilbert@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-fs.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+struct VHostUserFSPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserFS vdev;
+};
+
+typedef struct VHostUserFSPCI VHostUserFSPCI;
+
+#define TYPE_VHOST_USER_FS_PCI "vhost-user-fs-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserFSPCI, VHOST_USER_FS_PCI,
+                         TYPE_VHOST_USER_FS_PCI)
+
+static Property vhost_user_fs_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserFSPCI *dev = VHOST_USER_FS_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        /* Also reserve config change and hiprio queue vectors */
+        vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_fs_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_fs_pci_realize;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vhost_user_fs_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_OTHER;
+}
+
+static void vhost_user_fs_pci_instance_init(Object *obj)
+{
+    VHostUserFSPCI *dev = VHOST_USER_FS_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_FS);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_fs_pci_info = {
+    .base_name             = TYPE_VHOST_USER_FS_PCI,
+    .non_transitional_name = "vhost-user-fs-pci",
+    .instance_size = sizeof(VHostUserFSPCI),
+    .instance_init = vhost_user_fs_pci_instance_init,
+    .class_init    = vhost_user_fs_pci_class_init,
+};
+
+static void vhost_user_fs_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_fs_pci_info);
+}
+
+type_init(vhost_user_fs_pci_register);
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
new file mode 100644
index 00000000..d97b179e
--- /dev/null
+++ b/hw/virtio/vhost-user-fs.c
@@ -0,0 +1,336 @@
+/*
+ * Vhost-user filesystem virtio device
+ *
+ * Copyright 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include "standard-headers/linux/virtio_fs.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user-fs.h"
+#include "monitor/monitor.h"
+#include "sysemu/sysemu.h"
+
+static const int user_feature_bits[] = {
+    VIRTIO_F_VERSION_1,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_F_RING_PACKED,
+    VIRTIO_F_IOMMU_PLATFORM,
+    VIRTIO_F_RING_RESET,
+
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    struct virtio_fs_config fscfg = {};
+
+    memcpy((char *)fscfg.tag, fs->conf.tag,
+           MIN(strlen(fs->conf.tag) + 1, sizeof(fscfg.tag)));
+
+    virtio_stl_p(vdev, &fscfg.num_request_queues, fs->conf.num_request_queues);
+
+    memcpy(config, &fscfg, sizeof(fscfg));
+}
+
+static void vuf_start(VirtIODevice *vdev)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+    int i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&fs->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    fs->vhost_dev.acked_features = vdev->guest_features;
+    ret = vhost_dev_start(&fs->vhost_dev, vdev, true);
+    if (ret < 0) {
+        error_report("Error starting vhost: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here.  virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < fs->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&fs->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
+}
+
+static void vuf_stop(VirtIODevice *vdev)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&fs->vhost_dev, vdev, true);
+
+    ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&fs->vhost_dev, vdev);
+}
+
+static void vuf_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&fs->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vuf_start(vdev);
+    } else {
+        vuf_stop(vdev);
+    }
+}
+
+static uint64_t vuf_get_features(VirtIODevice *vdev,
+                                 uint64_t features,
+                                 Error **errp)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+
+    return vhost_get_features(&fs->vhost_dev, user_feature_bits, features);
+}
+
+static void vuf_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                            bool mask)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+
+    vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
+}
+
+static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+
+    return vhost_virtqueue_pending(&fs->vhost_dev, idx);
+}
+
+static void vuf_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserFS *fs = VHOST_USER_FS(dev);
+    unsigned int i;
+    size_t len;
+    int ret;
+
+    if (!fs->conf.chardev.chr) {
+        error_setg(errp, "missing chardev");
+        return;
+    }
+
+    if (!fs->conf.tag) {
+        error_setg(errp, "missing tag property");
+        return;
+    }
+    len = strlen(fs->conf.tag);
+    if (len == 0) {
+        error_setg(errp, "tag property cannot be empty");
+        return;
+    }
+    if (len > sizeof_field(struct virtio_fs_config, tag)) {
+        error_setg(errp, "tag property must be %zu bytes or less",
+                   sizeof_field(struct virtio_fs_config, tag));
+        return;
+    }
+
+    if (fs->conf.num_request_queues == 0) {
+        error_setg(errp, "num-request-queues property must be larger than 0");
+        return;
+    }
+
+    if (!is_power_of_2(fs->conf.queue_size)) {
+        error_setg(errp, "queue-size property must be a power of 2");
+        return;
+    }
+
+    if (fs->conf.queue_size > VIRTQUEUE_MAX_SIZE) {
+        error_setg(errp, "queue-size property must be %u or smaller",
+                   VIRTQUEUE_MAX_SIZE);
+        return;
+    }
+
+    if (!vhost_user_init(&fs->vhost_user, &fs->conf.chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config));
+
+    /* Hiprio queue */
+    fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
+
+    /* Request queues */
+    fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues);
+    for (i = 0; i < fs->conf.num_request_queues; i++) {
+        fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
+    }
+
+    /* 1 high prio queue, plus the number configured */
+    fs->vhost_dev.nvqs = 1 + fs->conf.num_request_queues;
+    fs->vhost_dev.vqs = g_new0(struct vhost_virtqueue, fs->vhost_dev.nvqs);
+    ret = vhost_dev_init(&fs->vhost_dev, &fs->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        goto err_virtio;
+    }
+
+    return;
+
+err_virtio:
+    vhost_user_cleanup(&fs->vhost_user);
+    virtio_delete_queue(fs->hiprio_vq);
+    for (i = 0; i < fs->conf.num_request_queues; i++) {
+        virtio_delete_queue(fs->req_vqs[i]);
+    }
+    g_free(fs->req_vqs);
+    virtio_cleanup(vdev);
+    g_free(fs->vhost_dev.vqs);
+    return;
+}
+
+static void vuf_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserFS *fs = VHOST_USER_FS(dev);
+    int i;
+
+    /* This will stop vhost backend if appropriate. */
+    vuf_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&fs->vhost_dev);
+
+    vhost_user_cleanup(&fs->vhost_user);
+
+    virtio_delete_queue(fs->hiprio_vq);
+    for (i = 0; i < fs->conf.num_request_queues; i++) {
+        virtio_delete_queue(fs->req_vqs[i]);
+    }
+    g_free(fs->req_vqs);
+    virtio_cleanup(vdev);
+    g_free(fs->vhost_dev.vqs);
+    fs->vhost_dev.vqs = NULL;
+}
+
+static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
+{
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    return &fs->vhost_dev;
+}
+
+static const VMStateDescription vuf_vmstate = {
+    .name = "vhost-user-fs",
+    .unmigratable = 1,
+};
+
+static Property vuf_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserFS, conf.chardev),
+    DEFINE_PROP_STRING("tag", VHostUserFS, conf.tag),
+    DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
+                       conf.num_request_queues, 1),
+    DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vuf_instance_init(Object *obj)
+{
+    VHostUserFS *fs = VHOST_USER_FS(obj);
+
+    device_add_bootindex_property(obj, &fs->bootindex, "bootindex",
+                                  "/filesystem@0", DEVICE(obj));
+}
+
+static void vuf_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vuf_properties);
+    dc->vmsd = &vuf_vmstate;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    vdc->realize = vuf_device_realize;
+    vdc->unrealize = vuf_device_unrealize;
+    vdc->get_features = vuf_get_features;
+    vdc->get_config = vuf_get_config;
+    vdc->set_status = vuf_set_status;
+    vdc->guest_notifier_mask = vuf_guest_notifier_mask;
+    vdc->guest_notifier_pending = vuf_guest_notifier_pending;
+    vdc->get_vhost = vuf_get_vhost;
+}
+
+static const TypeInfo vuf_info = {
+    .name = TYPE_VHOST_USER_FS,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserFS),
+    .instance_init = vuf_instance_init,
+    .class_init = vuf_class_init,
+};
+
+static void vuf_register_types(void)
+{
+    type_register_static(&vuf_info);
+}
+
+type_init(vuf_register_types)
diff --git a/hw/virtio/vhost-user-gpio-pci.c b/hw/virtio/vhost-user-gpio-pci.c
new file mode 100644
index 00000000..b3028a24
--- /dev/null
+++ b/hw/virtio/vhost-user-gpio-pci.c
@@ -0,0 +1,69 @@
+/*
+ * Vhost-user gpio virtio device PCI glue
+ *
+ * Copyright (c) 2022 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-gpio.h"
+#include "hw/virtio/virtio-pci.h"
+
+struct VHostUserGPIOPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserGPIO vdev;
+};
+
+typedef struct VHostUserGPIOPCI VHostUserGPIOPCI;
+
+#define TYPE_VHOST_USER_GPIO_PCI "vhost-user-gpio-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserGPIOPCI, VHOST_USER_GPIO_PCI,
+                         TYPE_VHOST_USER_GPIO_PCI)
+
+static void vhost_user_gpio_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserGPIOPCI *dev = VHOST_USER_GPIO_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    vpci_dev->nvectors = 1;
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_gpio_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_gpio_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_user_gpio_pci_instance_init(Object *obj)
+{
+    VHostUserGPIOPCI *dev = VHOST_USER_GPIO_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_GPIO);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_gpio_pci_info = {
+    .base_name = TYPE_VHOST_USER_GPIO_PCI,
+    .non_transitional_name = "vhost-user-gpio-pci",
+    .instance_size = sizeof(VHostUserGPIOPCI),
+    .instance_init = vhost_user_gpio_pci_instance_init,
+    .class_init = vhost_user_gpio_pci_class_init,
+};
+
+static void vhost_user_gpio_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_gpio_pci_info);
+}
+
+type_init(vhost_user_gpio_pci_register);
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
new file mode 100644
index 00000000..b7b82a10
--- /dev/null
+++ b/hw/virtio/vhost-user-gpio.c
@@ -0,0 +1,418 @@
+/*
+ * Vhost-user GPIO virtio device
+ *
+ * Copyright (c) 2022 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-gpio.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "trace.h"
+
+#define REALIZE_CONNECTION_RETRIES 3
+
+/* Features required from VirtIO */
+static const int feature_bits[] = {
+    VIRTIO_F_VERSION_1,
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_GPIO_F_IRQ,
+    VIRTIO_F_RING_RESET,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vu_gpio_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+
+    memcpy(config, &gpio->config, sizeof(gpio->config));
+}
+
+static int vu_gpio_config_notifier(struct vhost_dev *dev)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(dev->vdev);
+
+    memcpy(dev->vdev->config, &gpio->config, sizeof(gpio->config));
+    virtio_notify_config(dev->vdev);
+
+    return 0;
+}
+
+const VhostDevConfigOps gpio_ops = {
+    .vhost_dev_config_notifier = vu_gpio_config_notifier,
+};
+
+static int vu_gpio_start(VirtIODevice *vdev)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+    struct vhost_dev *vhost_dev = &gpio->vhost_dev;
+    int ret, i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return -ENOSYS;
+    }
+
+    ret = vhost_dev_enable_notifiers(vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", ret);
+        return ret;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", ret);
+        goto err_host_notifiers;
+    }
+
+    /*
+     * Before we start up we need to ensure we have the final feature
+     * set needed for the vhost configuration. The backend may also
+     * apply backend_features when the feature set is sent.
+     */
+    vhost_ack_features(&gpio->vhost_dev, feature_bits, vdev->guest_features);
+
+    ret = vhost_dev_start(&gpio->vhost_dev, vdev, false);
+    if (ret < 0) {
+        error_report("Error starting vhost-user-gpio: %d", ret);
+        goto err_guest_notifiers;
+    }
+    gpio->started_vu = true;
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here. virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < gpio->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&gpio->vhost_dev, vdev, i, false);
+    }
+
+    /*
+     * As we must have VHOST_USER_F_PROTOCOL_FEATURES (because
+     * VHOST_USER_GET_CONFIG requires it) we need to explicitly enable
+     * the vrings.
+     */
+    g_assert(vhost_dev->vhost_ops &&
+             vhost_dev->vhost_ops->vhost_set_vring_enable);
+    ret = vhost_dev->vhost_ops->vhost_set_vring_enable(vhost_dev, true);
+    if (ret == 0) {
+        return 0;
+    }
+
+    error_report("Failed to start vrings for vhost-user-gpio: %d", ret);
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, gpio->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&gpio->vhost_dev, vdev);
+
+    return ret;
+}
+
+static void vu_gpio_stop(VirtIODevice *vdev)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    struct vhost_dev *vhost_dev = &gpio->vhost_dev;
+    int ret;
+
+    if (!gpio->started_vu) {
+        return;
+    }
+    gpio->started_vu = false;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(vhost_dev, vdev, false);
+
+    ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(vhost_dev, vdev);
+}
+
+static void vu_gpio_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    trace_virtio_gpio_set_status(status);
+
+    if (!gpio->connected) {
+        return;
+    }
+
+    if (vhost_dev_is_started(&gpio->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        if (vu_gpio_start(vdev)) {
+            qemu_chr_fe_disconnect(&gpio->chardev);
+        }
+    } else {
+        vu_gpio_stop(vdev);
+    }
+}
+
+static uint64_t vu_gpio_get_features(VirtIODevice *vdev, uint64_t features,
+                                     Error **errp)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+
+    return vhost_get_features(&gpio->vhost_dev, feature_bits, features);
+}
+
+static void vu_gpio_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+
+    vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask);
+}
+
+static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio)
+{
+    virtio_delete_queue(gpio->command_vq);
+    virtio_delete_queue(gpio->interrupt_vq);
+    g_free(gpio->vhost_dev.vqs);
+    gpio->vhost_dev.vqs = NULL;
+    virtio_cleanup(vdev);
+    vhost_user_cleanup(&gpio->vhost_user);
+}
+
+static int vu_gpio_connect(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+    struct vhost_dev *vhost_dev = &gpio->vhost_dev;
+    int ret;
+
+    if (gpio->connected) {
+        return 0;
+    }
+    gpio->connected = true;
+
+    vhost_dev_set_config_notifier(vhost_dev, &gpio_ops);
+    gpio->vhost_user.supports_config = true;
+
+    ret = vhost_dev_init(vhost_dev, &gpio->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* restore vhost state */
+    if (virtio_device_started(vdev, vdev->status)) {
+        vu_gpio_start(vdev);
+    }
+
+    return 0;
+}
+
+static void vu_gpio_event(void *opaque, QEMUChrEvent event);
+
+static void vu_gpio_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+
+    if (!gpio->connected) {
+        return;
+    }
+    gpio->connected = false;
+
+    vu_gpio_stop(vdev);
+    vhost_dev_cleanup(&gpio->vhost_dev);
+
+    /* Re-instate the event handler for new connections */
+    qemu_chr_fe_set_handlers(&gpio->chardev,
+                             NULL, NULL, vu_gpio_event,
+                             NULL, dev, NULL, true);
+}
+
+static void vu_gpio_event(void *opaque, QEMUChrEvent event)
+{
+    DeviceState *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+    Error *local_err = NULL;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (vu_gpio_connect(dev, &local_err) < 0) {
+            qemu_chr_fe_disconnect(&gpio->chardev);
+            return;
+        }
+        break;
+    case CHR_EVENT_CLOSED:
+        /* defer close until later to avoid circular close */
+        vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
+                               vu_gpio_disconnect);
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+static int vu_gpio_realize_connect(VHostUserGPIO *gpio, Error **errp)
+{
+    VirtIODevice *vdev = &gpio->parent_obj;
+    DeviceState *dev = &vdev->parent_obj;
+    struct vhost_dev *vhost_dev = &gpio->vhost_dev;
+    int ret;
+
+    ret = qemu_chr_fe_wait_connected(&gpio->chardev, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /*
+     * vu_gpio_connect() may have already connected (via the event
+     * callback) in which case it will just report success.
+     */
+    ret = vu_gpio_connect(dev, errp);
+    if (ret < 0) {
+        qemu_chr_fe_disconnect(&gpio->chardev);
+        return ret;
+    }
+    g_assert(gpio->connected);
+
+    ret = vhost_dev_get_config(vhost_dev, (uint8_t *)&gpio->config,
+                               sizeof(gpio->config), errp);
+
+    if (ret < 0) {
+        error_report("vhost-user-gpio: get config failed");
+
+        qemu_chr_fe_disconnect(&gpio->chardev);
+        vhost_dev_cleanup(vhost_dev);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vu_gpio_device_realize(DeviceState *dev, Error **errp)
+{
+    ERRP_GUARD();
+
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(dev);
+    int retries, ret;
+
+    if (!gpio->chardev.chr) {
+        error_setg(errp, "vhost-user-gpio: chardev is mandatory");
+        return;
+    }
+
+    if (!vhost_user_init(&gpio->vhost_user, &gpio->chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_GPIO, sizeof(gpio->config));
+
+    gpio->vhost_dev.nvqs = 2;
+    gpio->command_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output);
+    gpio->interrupt_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output);
+    gpio->vhost_dev.vqs = g_new0(struct vhost_virtqueue, gpio->vhost_dev.nvqs);
+
+    gpio->connected = false;
+
+    qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, vu_gpio_event, NULL,
+                             dev, NULL, true);
+
+    retries = REALIZE_CONNECTION_RETRIES;
+    g_assert(!*errp);
+    do {
+        if (*errp) {
+            error_prepend(errp, "Reconnecting after error: ");
+            error_report_err(*errp);
+            *errp = NULL;
+        }
+        ret = vu_gpio_realize_connect(gpio, errp);
+    } while (ret < 0 && retries--);
+
+    if (ret < 0) {
+        do_vhost_user_cleanup(vdev, gpio);
+    }
+
+    return;
+}
+
+static void vu_gpio_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserGPIO *gpio = VHOST_USER_GPIO(dev);
+
+    vu_gpio_set_status(vdev, 0);
+    qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, NULL, NULL, NULL, NULL,
+                             false);
+    vhost_dev_cleanup(&gpio->vhost_dev);
+    do_vhost_user_cleanup(vdev, gpio);
+}
+
+static const VMStateDescription vu_gpio_vmstate = {
+    .name = "vhost-user-gpio",
+    .unmigratable = 1,
+};
+
+static Property vu_gpio_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserGPIO, chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vu_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vu_gpio_properties);
+    dc->vmsd = &vu_gpio_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    vdc->realize = vu_gpio_device_realize;
+    vdc->unrealize = vu_gpio_device_unrealize;
+    vdc->get_features = vu_gpio_get_features;
+    vdc->get_config = vu_gpio_get_config;
+    vdc->set_status = vu_gpio_set_status;
+    vdc->guest_notifier_mask = vu_gpio_guest_notifier_mask;
+}
+
+static const TypeInfo vu_gpio_info = {
+    .name = TYPE_VHOST_USER_GPIO,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserGPIO),
+    .class_init = vu_gpio_class_init,
+};
+
+static void vu_gpio_register_types(void)
+{
+    type_register_static(&vu_gpio_info);
+}
+
+type_init(vu_gpio_register_types)
diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c
new file mode 100644
index 00000000..00ac1094
--- /dev/null
+++ b/hw/virtio/vhost-user-i2c-pci.c
@@ -0,0 +1,69 @@
+/*
+ * Vhost-user i2c virtio device PCI glue
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-i2c.h"
+#include "hw/virtio/virtio-pci.h"
+
+struct VHostUserI2CPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserI2C vdev;
+};
+
+typedef struct VHostUserI2CPCI VHostUserI2CPCI;
+
+#define TYPE_VHOST_USER_I2C_PCI "vhost-user-i2c-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserI2CPCI, VHOST_USER_I2C_PCI,
+                         TYPE_VHOST_USER_I2C_PCI)
+
+static void vhost_user_i2c_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    vpci_dev->nvectors = 1;
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_i2c_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_i2c_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_user_i2c_pci_instance_init(Object *obj)
+{
+    VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_I2C);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_i2c_pci_info = {
+    .base_name = TYPE_VHOST_USER_I2C_PCI,
+    .non_transitional_name = "vhost-user-i2c-pci",
+    .instance_size = sizeof(VHostUserI2CPCI),
+    .instance_init = vhost_user_i2c_pci_instance_init,
+    .class_init = vhost_user_i2c_pci_class_init,
+};
+
+static void vhost_user_i2c_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_i2c_pci_info);
+}
+
+type_init(vhost_user_i2c_pci_register);
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
new file mode 100644
index 00000000..dc5c828b
--- /dev/null
+++ b/hw/virtio/vhost-user-i2c.c
@@ -0,0 +1,287 @@
+/*
+ * Vhost-user i2c virtio device
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-i2c.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+static const int feature_bits[] = {
+    VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
+    VIRTIO_F_RING_RESET,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vu_i2c_start(VirtIODevice *vdev)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    int ret, i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&i2c->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    i2c->vhost_dev.acked_features = vdev->guest_features;
+
+    ret = vhost_dev_start(&i2c->vhost_dev, vdev, true);
+    if (ret < 0) {
+        error_report("Error starting vhost-user-i2c: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here. virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < i2c->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&i2c->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev);
+}
+
+static void vu_i2c_stop(VirtIODevice *vdev)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&i2c->vhost_dev, vdev, true);
+
+    ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev);
+}
+
+static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&i2c->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vu_i2c_start(vdev);
+    } else {
+        vu_i2c_stop(vdev);
+    }
+}
+
+static uint64_t vu_i2c_get_features(VirtIODevice *vdev,
+                                    uint64_t requested_features, Error **errp)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    virtio_add_feature(&requested_features, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST);
+    return vhost_get_features(&i2c->vhost_dev, feature_bits, requested_features);
+}
+
+static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask);
+}
+
+static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    return vhost_virtqueue_pending(&i2c->vhost_dev, idx);
+}
+
+static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c)
+{
+    vhost_user_cleanup(&i2c->vhost_user);
+    virtio_delete_queue(i2c->vq);
+    virtio_cleanup(vdev);
+    g_free(i2c->vhost_dev.vqs);
+    i2c->vhost_dev.vqs = NULL;
+}
+
+static int vu_i2c_connect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    if (i2c->connected) {
+        return 0;
+    }
+    i2c->connected = true;
+
+    /* restore vhost state */
+    if (virtio_device_started(vdev, vdev->status)) {
+        vu_i2c_start(vdev);
+    }
+
+    return 0;
+}
+
+static void vu_i2c_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    if (!i2c->connected) {
+        return;
+    }
+    i2c->connected = false;
+
+    if (vhost_dev_is_started(&i2c->vhost_dev)) {
+        vu_i2c_stop(vdev);
+    }
+}
+
+static void vu_i2c_event(void *opaque, QEMUChrEvent event)
+{
+    DeviceState *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (vu_i2c_connect(dev) < 0) {
+            qemu_chr_fe_disconnect(&i2c->chardev);
+            return;
+        }
+        break;
+    case CHR_EVENT_CLOSED:
+        vu_i2c_disconnect(dev);
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+static void vu_i2c_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(dev);
+    int ret;
+
+    if (!i2c->chardev.chr) {
+        error_setg(errp, "vhost-user-i2c: missing chardev");
+        return;
+    }
+
+    if (!vhost_user_init(&i2c->vhost_user, &i2c->chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0);
+
+    i2c->vhost_dev.nvqs = 1;
+    i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output);
+    i2c->vhost_dev.vqs = g_new0(struct vhost_virtqueue, i2c->vhost_dev.nvqs);
+
+    ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        do_vhost_user_cleanup(vdev, i2c);
+    }
+
+    qemu_chr_fe_set_handlers(&i2c->chardev, NULL, NULL, vu_i2c_event, NULL,
+                             dev, NULL, true);
+}
+
+static void vu_i2c_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(dev);
+
+    /* This will stop vhost backend if appropriate. */
+    vu_i2c_set_status(vdev, 0);
+    vhost_dev_cleanup(&i2c->vhost_dev);
+    do_vhost_user_cleanup(vdev, i2c);
+}
+
+static const VMStateDescription vu_i2c_vmstate = {
+    .name = "vhost-user-i2c",
+    .unmigratable = 1,
+};
+
+static Property vu_i2c_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserI2C, chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vu_i2c_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vu_i2c_properties);
+    dc->vmsd = &vu_i2c_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    vdc->realize = vu_i2c_device_realize;
+    vdc->unrealize = vu_i2c_device_unrealize;
+    vdc->get_features = vu_i2c_get_features;
+    vdc->set_status = vu_i2c_set_status;
+    vdc->guest_notifier_mask = vu_i2c_guest_notifier_mask;
+    vdc->guest_notifier_pending = vu_i2c_guest_notifier_pending;
+}
+
+static const TypeInfo vu_i2c_info = {
+    .name = TYPE_VHOST_USER_I2C,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserI2C),
+    .class_init = vu_i2c_class_init,
+};
+
+static void vu_i2c_register_types(void)
+{
+    type_register_static(&vu_i2c_info);
+}
+
+type_init(vu_i2c_register_types)
diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c
new file mode 100644
index 00000000..b858898a
--- /dev/null
+++ b/hw/virtio/vhost-user-input-pci.c
@@ -0,0 +1,50 @@
+/*
+ * This work is licensed under the terms of the GNU LGPL, version 2 or
+ * later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-input.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VHostUserInputPCI VHostUserInputPCI;
+
+#define TYPE_VHOST_USER_INPUT_PCI "vhost-user-input-pci"
+
+DECLARE_INSTANCE_CHECKER(VHostUserInputPCI, VHOST_USER_INPUT_PCI,
+                         TYPE_VHOST_USER_INPUT_PCI)
+
+struct VHostUserInputPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserInput vhi;
+};
+
+static void vhost_user_input_pci_instance_init(Object *obj)
+{
+    VHostUserInputPCI *dev = VHOST_USER_INPUT_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vhi, sizeof(dev->vhi),
+                                TYPE_VHOST_USER_INPUT);
+
+    object_property_add_alias(obj, "chardev",
+                              OBJECT(&dev->vhi), "chardev");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_input_pci_info = {
+    .generic_name = TYPE_VHOST_USER_INPUT_PCI,
+    .parent = TYPE_VIRTIO_INPUT_PCI,
+    .instance_size = sizeof(VHostUserInputPCI),
+    .instance_init = vhost_user_input_pci_instance_init,
+};
+
+static void vhost_user_input_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_input_pci_info);
+}
+
+type_init(vhost_user_input_pci_register)
diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c
new file mode 100644
index 00000000..f6493545
--- /dev/null
+++ b/hw/virtio/vhost-user-rng-pci.c
@@ -0,0 +1,79 @@
+/*
+ * Vhost-user RNG virtio device PCI glue
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-rng.h"
+#include "hw/virtio/virtio-pci.h"
+
+struct VHostUserRNGPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserRNG vdev;
+};
+
+typedef struct VHostUserRNGPCI VHostUserRNGPCI;
+
+#define TYPE_VHOST_USER_RNG_PCI "vhost-user-rng-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserRNGPCI, VHOST_USER_RNG_PCI,
+                         TYPE_VHOST_USER_RNG_PCI)
+
+static Property vhost_user_rng_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_rng_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_rng_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    device_class_set_props(dc, vhost_user_rng_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void vhost_user_rng_pci_instance_init(Object *obj)
+{
+    VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_RNG);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_rng_pci_info = {
+    .base_name = TYPE_VHOST_USER_RNG_PCI,
+    .non_transitional_name = "vhost-user-rng-pci",
+    .instance_size = sizeof(VHostUserRNGPCI),
+    .instance_init = vhost_user_rng_pci_instance_init,
+    .class_init = vhost_user_rng_pci_class_init,
+};
+
+static void vhost_user_rng_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_rng_pci_info);
+}
+
+type_init(vhost_user_rng_pci_register);
diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c
new file mode 100644
index 00000000..201a39e2
--- /dev/null
+++ b/hw/virtio/vhost-user-rng.c
@@ -0,0 +1,299 @@
+/*
+ * Vhost-user RNG virtio device
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * Implementation seriously tailored on vhost-user-i2c.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-rng.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+static const int feature_bits[] = {
+    VIRTIO_F_RING_RESET,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vu_rng_start(VirtIODevice *vdev)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+    int i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&rng->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    rng->vhost_dev.acked_features = vdev->guest_features;
+    ret = vhost_dev_start(&rng->vhost_dev, vdev, true);
+    if (ret < 0) {
+        error_report("Error starting vhost-user-rng: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here. virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < rng->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&rng->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&rng->vhost_dev, vdev);
+}
+
+static void vu_rng_stop(VirtIODevice *vdev)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&rng->vhost_dev, vdev, true);
+
+    ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&rng->vhost_dev, vdev);
+}
+
+static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&rng->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vu_rng_start(vdev);
+    } else {
+        vu_rng_stop(vdev);
+    }
+}
+
+static uint64_t vu_rng_get_features(VirtIODevice *vdev,
+                                    uint64_t requested_features, Error **errp)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    return vhost_get_features(&rng->vhost_dev, feature_bits,
+                              requested_features);
+}
+
+static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask);
+}
+
+static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    return vhost_virtqueue_pending(&rng->vhost_dev, idx);
+}
+
+static void vu_rng_connect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    if (rng->connected) {
+        return;
+    }
+
+    rng->connected = true;
+
+    /* restore vhost state */
+    if (virtio_device_started(vdev, vdev->status)) {
+        vu_rng_start(vdev);
+    }
+}
+
+static void vu_rng_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    if (!rng->connected) {
+        return;
+    }
+
+    rng->connected = false;
+
+    if (vhost_dev_is_started(&rng->vhost_dev)) {
+        vu_rng_stop(vdev);
+    }
+}
+
+static void vu_rng_event(void *opaque, QEMUChrEvent event)
+{
+    DeviceState *dev = opaque;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        vu_rng_connect(dev);
+        break;
+    case CHR_EVENT_CLOSED:
+        vu_rng_disconnect(dev);
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+static void vu_rng_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(dev);
+    int ret;
+
+    if (!rng->chardev.chr) {
+        error_setg(errp, "missing chardev");
+        return;
+    }
+
+    if (!vhost_user_init(&rng->vhost_user, &rng->chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_RNG, 0);
+
+    rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output);
+    if (!rng->req_vq) {
+        error_setg_errno(errp, -1, "virtio_add_queue() failed");
+        goto virtio_add_queue_failed;
+    }
+
+    rng->vhost_dev.nvqs = 1;
+    rng->vhost_dev.vqs = g_new0(struct vhost_virtqueue, rng->vhost_dev.nvqs);
+    ret = vhost_dev_init(&rng->vhost_dev, &rng->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_dev_init() failed");
+        goto vhost_dev_init_failed;
+    }
+
+    qemu_chr_fe_set_handlers(&rng->chardev, NULL, NULL, vu_rng_event, NULL,
+                             dev, NULL, true);
+
+    return;
+
+vhost_dev_init_failed:
+    virtio_delete_queue(rng->req_vq);
+virtio_add_queue_failed:
+    virtio_cleanup(vdev);
+    vhost_user_cleanup(&rng->vhost_user);
+}
+
+static void vu_rng_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(dev);
+
+    vu_rng_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&rng->vhost_dev);
+    g_free(rng->vhost_dev.vqs);
+    rng->vhost_dev.vqs = NULL;
+    virtio_delete_queue(rng->req_vq);
+    virtio_cleanup(vdev);
+    vhost_user_cleanup(&rng->vhost_user);
+}
+
+static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    return &rng->vhost_dev;
+}
+
+static const VMStateDescription vu_rng_vmstate = {
+    .name = "vhost-user-rng",
+    .unmigratable = 1,
+};
+
+static Property vu_rng_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserRNG, chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vu_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vu_rng_properties);
+    dc->vmsd = &vu_rng_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+
+    vdc->realize = vu_rng_device_realize;
+    vdc->unrealize = vu_rng_device_unrealize;
+    vdc->get_features = vu_rng_get_features;
+    vdc->set_status = vu_rng_set_status;
+    vdc->guest_notifier_mask = vu_rng_guest_notifier_mask;
+    vdc->guest_notifier_pending = vu_rng_guest_notifier_pending;
+    vdc->get_vhost = vu_rng_get_vhost;
+}
+
+static const TypeInfo vu_rng_info = {
+    .name = TYPE_VHOST_USER_RNG,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserRNG),
+    .class_init = vu_rng_class_init,
+};
+
+static void vu_rng_register_types(void)
+{
+    type_register_static(&vu_rng_info);
+}
+
+type_init(vu_rng_register_types)
diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c
new file mode 100644
index 00000000..75882e3c
--- /dev/null
+++ b/hw/virtio/vhost-user-scsi-pci.c
@@ -0,0 +1,110 @@
+/*
+ * Vhost user scsi PCI Bindings
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is largely based on the "vhost-scsi" implementation by:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "standard-headers/linux/virtio_pci.h"
+#include "hw/virtio/vhost-user-scsi.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/loader.h"
+#include "sysemu/kvm.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
+
+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostUserSCSIPCI, VHOST_USER_SCSI_PCI,
+                         TYPE_VHOST_USER_SCSI_PCI)
+
+struct VHostUserSCSIPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserSCSI vdev;
+};
+
+static Property vhost_user_scsi_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOSCSIConf *conf = &dev->vdev.parent_obj.parent_obj.conf;
+
+    if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) {
+        conf->num_queues =
+            virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_scsi_pci_realize;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, vhost_user_scsi_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_user_scsi_pci_instance_init(Object *obj)
+{
+    VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_SCSI);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = {
+    .base_name             = TYPE_VHOST_USER_SCSI_PCI,
+    .generic_name          = "vhost-user-scsi-pci",
+    .transitional_name     = "vhost-user-scsi-pci-transitional",
+    .non_transitional_name = "vhost-user-scsi-pci-non-transitional",
+    .instance_size = sizeof(VHostUserSCSIPCI),
+    .instance_init = vhost_user_scsi_pci_instance_init,
+    .class_init    = vhost_user_scsi_pci_class_init,
+};
+
+static void vhost_user_scsi_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_scsi_pci_info);
+}
+
+type_init(vhost_user_scsi_pci_register)
diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c
new file mode 100644
index 00000000..e5a86e80
--- /dev/null
+++ b/hw/virtio/vhost-user-vsock-pci.c
@@ -0,0 +1,86 @@
+/*
+ * Vhost-user vsock PCI Bindings
+ *
+ * Copyright 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-vsock.h"
+#include "qom/object.h"
+
+typedef struct VHostUserVSockPCI VHostUserVSockPCI;
+
+/*
+ * vhost-user-vsock-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_USER_VSOCK_PCI "vhost-user-vsock-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostUserVSockPCI, VHOST_USER_VSOCK_PCI,
+                         TYPE_VHOST_USER_VSOCK_PCI)
+
+struct VHostUserVSockPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserVSock vdev;
+};
+
+/* vhost-user-vsock-pci */
+
+static Property vhost_user_vsock_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    /* unlike vhost-vsock, we do not need to care about pre-5.1 compat */
+    virtio_pci_force_virtio_1(vpci_dev);
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_vsock_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_vsock_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, vhost_user_vsock_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_user_vsock_pci_instance_init(Object *obj)
+{
+    VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_VSOCK);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = {
+    .base_name             = TYPE_VHOST_USER_VSOCK_PCI,
+    .generic_name          = "vhost-user-vsock-pci",
+    .non_transitional_name = "vhost-user-vsock-pci-non-transitional",
+    .instance_size = sizeof(VHostUserVSockPCI),
+    .instance_init = vhost_user_vsock_pci_instance_init,
+    .class_init    = vhost_user_vsock_pci_class_init,
+};
+
+static void virtio_pci_vhost_register(void)
+{
+    virtio_pci_types_register(&vhost_user_vsock_pci_info);
+}
+
+type_init(virtio_pci_vhost_register)
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
new file mode 100644
index 00000000..9431b979
--- /dev/null
+++ b/hw/virtio/vhost-user-vsock.c
@@ -0,0 +1,180 @@
+/*
+ * Vhost-user vsock virtio device
+ *
+ * Copyright 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/virtio/vhost-user-vsock.h"
+
+static const int user_feature_bits[] = {
+    VIRTIO_F_VERSION_1,
+    VIRTIO_RING_F_INDIRECT_DESC,
+    VIRTIO_RING_F_EVENT_IDX,
+    VIRTIO_F_NOTIFY_ON_EMPTY,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+static void vuv_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VHostUserVSock *vsock = VHOST_USER_VSOCK(vdev);
+
+    memcpy(config, &vsock->vsockcfg, sizeof(struct virtio_vsock_config));
+}
+
+static int vuv_handle_config_change(struct vhost_dev *dev)
+{
+    VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev);
+    Error *local_err = NULL;
+    int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg,
+                                   sizeof(struct virtio_vsock_config),
+                                   &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        return -1;
+    }
+
+    virtio_notify_config(dev->vdev);
+
+    return 0;
+}
+
+const VhostDevConfigOps vsock_ops = {
+    .vhost_dev_config_notifier = vuv_handle_config_change,
+};
+
+static void vuv_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        int ret = vhost_vsock_common_start(vdev);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        vhost_vsock_common_stop(vdev);
+    }
+}
+
+static uint64_t vuv_get_features(VirtIODevice *vdev,
+                                 uint64_t features,
+                                 Error **errp)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    features = vhost_get_features(&vvc->vhost_dev, user_feature_bits, features);
+
+    return vhost_vsock_common_get_features(vdev, features, errp);
+}
+
+static const VMStateDescription vuv_vmstate = {
+    .name = "vhost-user-vsock",
+    .unmigratable = 1,
+};
+
+static void vuv_device_realize(DeviceState *dev, Error **errp)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserVSock *vsock = VHOST_USER_VSOCK(dev);
+    int ret;
+
+    if (!vsock->conf.chardev.chr) {
+        error_setg(errp, "missing chardev");
+        return;
+    }
+
+    if (!vhost_user_init(&vsock->vhost_user, &vsock->conf.chardev, errp)) {
+        return;
+    }
+
+    vhost_vsock_common_realize(vdev);
+
+    vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops);
+
+    ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        goto err_virtio;
+    }
+
+    ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg,
+                               sizeof(struct virtio_vsock_config), errp);
+    if (ret < 0) {
+        goto err_vhost_dev;
+    }
+
+    return;
+
+err_vhost_dev:
+    vhost_dev_cleanup(&vvc->vhost_dev);
+err_virtio:
+    vhost_vsock_common_unrealize(vdev);
+    vhost_user_cleanup(&vsock->vhost_user);
+    return;
+}
+
+static void vuv_device_unrealize(DeviceState *dev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserVSock *vsock = VHOST_USER_VSOCK(dev);
+
+    /* This will stop vhost backend if appropriate. */
+    vuv_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&vvc->vhost_dev);
+
+    vhost_vsock_common_unrealize(vdev);
+
+    vhost_user_cleanup(&vsock->vhost_user);
+
+}
+
+static Property vuv_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserVSock, conf.chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vuv_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vuv_properties);
+    dc->vmsd = &vuv_vmstate;
+    vdc->realize = vuv_device_realize;
+    vdc->unrealize = vuv_device_unrealize;
+    vdc->get_features = vuv_get_features;
+    vdc->get_config = vuv_get_config;
+    vdc->set_status = vuv_set_status;
+}
+
+static const TypeInfo vuv_info = {
+    .name = TYPE_VHOST_USER_VSOCK,
+    .parent = TYPE_VHOST_VSOCK_COMMON,
+    .instance_size = sizeof(VHostUserVSock),
+    .class_init = vuv_class_init,
+};
+
+static void vuv_register_types(void)
+{
+    type_register_static(&vuv_info);
+}
+
+type_init(vuv_register_types)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
new file mode 100644
index 00000000..8f635844
--- /dev/null
+++ b/hw/virtio/vhost-user.c
@@ -0,0 +1,2800 @@
+/*
+ * vhost-user
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-net.h"
+#include "chardev/char-fe.h"
+#include "io/channel-socket.h"
+#include "sysemu/kvm.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/sockets.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cryptodev.h"
+#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
+#include "trace.h"
+#include "exec/ramblock.h"
+
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "standard-headers/linux/vhost_types.h"
+
+#ifdef CONFIG_LINUX
+#include <linux/userfaultfd.h>
+#endif
+
+#define VHOST_MEMORY_BASELINE_NREGIONS    8
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_SLAVE_MAX_FDS     8
+
+/*
+ * Set maximum number of RAM slots supported to
+ * the maximum number supported by the target
+ * hardware plaform.
+ */
+#if defined(TARGET_X86) || defined(TARGET_X86_64) || \
+    defined(TARGET_ARM) || defined(TARGET_ARM_64)
+#include "hw/acpi/acpi.h"
+#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS
+
+#elif defined(TARGET_PPC) || defined(TARGET_PPC64)
+#include "hw/ppc/spapr.h"
+#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
+
+#else
+#define VHOST_USER_MAX_RAM_SLOTS 512
+#endif
+
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
+    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
+    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
+    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
+    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
+    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+    VHOST_USER_PROTOCOL_F_CONFIG = 9,
+    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
+    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+    /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
+    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
+    VHOST_USER_PROTOCOL_F_STATUS = 16,
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_NET_SET_MTU = 20,
+    VHOST_USER_SET_SLAVE_REQ_FD = 21,
+    VHOST_USER_IOTLB_MSG = 22,
+    VHOST_USER_SET_VRING_ENDIAN = 23,
+    VHOST_USER_GET_CONFIG = 24,
+    VHOST_USER_SET_CONFIG = 25,
+    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
+    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
+    VHOST_USER_POSTCOPY_ADVISE  = 28,
+    VHOST_USER_POSTCOPY_LISTEN  = 29,
+    VHOST_USER_POSTCOPY_END     = 30,
+    VHOST_USER_GET_INFLIGHT_FD = 31,
+    VHOST_USER_SET_INFLIGHT_FD = 32,
+    VHOST_USER_GPU_SET_SOCKET = 33,
+    VHOST_USER_RESET_DEVICE = 34,
+    /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
+    VHOST_USER_GET_MAX_MEM_SLOTS = 36,
+    VHOST_USER_ADD_MEM_REG = 37,
+    VHOST_USER_REM_MEM_REG = 38,
+    VHOST_USER_SET_STATUS = 39,
+    VHOST_USER_GET_STATUS = 40,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef enum VhostUserSlaveRequest {
+    VHOST_USER_SLAVE_NONE = 0,
+    VHOST_USER_SLAVE_IOTLB_MSG = 1,
+    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
+    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+    VHOST_USER_SLAVE_MAX
+}  VhostUserSlaveRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMemRegMsg {
+    uint64_t padding;
+    VhostUserMemoryRegion region;
+} VhostUserMemRegMsg;
+
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserConfig {
+    uint32_t offset;
+    uint32_t size;
+    uint32_t flags;
+    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
+#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
+#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
+
+typedef struct VhostUserCryptoSession {
+    /* session id for success, -1 on errors */
+    int64_t session_id;
+    CryptoDevBackendSymSessionInfo session_setup_data;
+    uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
+    uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
+} VhostUserCryptoSession;
+
+static VhostUserConfig c __attribute__ ((unused));
+#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
+                                   + sizeof(c.size) \
+                                   + sizeof(c.flags))
+
+typedef struct VhostUserVringArea {
+    uint64_t u64;
+    uint64_t size;
+    uint64_t offset;
+} VhostUserVringArea;
+
+typedef struct VhostUserInflight {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+    uint16_t num_queues;
+    uint16_t queue_size;
+} VhostUserInflight;
+
+typedef struct {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+} QEMU_PACKED VhostUserHeader;
+
+typedef union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+        VhostUserMemRegMsg mem_reg;
+        VhostUserLog log;
+        struct vhost_iotlb_msg iotlb;
+        VhostUserConfig config;
+        VhostUserCryptoSession session;
+        VhostUserVringArea area;
+        VhostUserInflight inflight;
+} VhostUserPayload;
+
+typedef struct VhostUserMsg {
+    VhostUserHeader hdr;
+    VhostUserPayload payload;
+} QEMU_PACKED VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
+
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+struct vhost_user {
+    struct vhost_dev *dev;
+    /* Shared between vhost devs of the same virtio device */
+    VhostUserState *user;
+    QIOChannel *slave_ioc;
+    GSource *slave_src;
+    NotifierWithReturn postcopy_notifier;
+    struct PostCopyFD  postcopy_fd;
+    uint64_t           postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
+    /* Length of the region_rb and region_rb_offset arrays */
+    size_t             region_rb_len;
+    /* RAMBlock associated with a given region */
+    RAMBlock         **region_rb;
+    /*
+     * The offset from the start of the RAMBlock to the start of the
+     * vhost region.
+     */
+    ram_addr_t        *region_rb_offset;
+
+    /* True once we've entered postcopy_listen */
+    bool               postcopy_listen;
+
+    /* Our current regions */
+    int num_shadow_regions;
+    struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
+};
+
+struct scrub_regions {
+    struct vhost_memory_region *region;
+    int reg_idx;
+    int fd_idx;
+};
+
+static bool ioeventfd_enabled(void)
+{
+    return !kvm_enabled() || kvm_eventfds_enabled();
+}
+
+static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
+{
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    uint8_t *p = (uint8_t *) msg;
+    int r, size = VHOST_USER_HDR_SIZE;
+
+    r = qemu_chr_fe_read_all(chr, p, size);
+    if (r != size) {
+        int saved_errno = errno;
+        error_report("Failed to read msg header. Read %d instead of %d."
+                     " Original request %d.", r, size, msg->hdr.request);
+        return r < 0 ? -saved_errno : -EIO;
+    }
+
+    /* validate received flags */
+    if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
+        error_report("Failed to read msg header."
+                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
+                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
+        return -EPROTO;
+    }
+
+    trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
+
+    return 0;
+}
+
+struct vhost_user_read_cb_data {
+    struct vhost_dev *dev;
+    VhostUserMsg *msg;
+    GMainLoop *loop;
+    int ret;
+};
+
+static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
+                                   gpointer opaque)
+{
+    struct vhost_user_read_cb_data *data = opaque;
+    struct vhost_dev *dev = data->dev;
+    VhostUserMsg *msg = data->msg;
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    uint8_t *p = (uint8_t *) msg;
+    int r, size;
+
+    r = vhost_user_read_header(dev, msg);
+    if (r < 0) {
+        data->ret = r;
+        goto end;
+    }
+
+    /* validate message size is sane */
+    if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
+        error_report("Failed to read msg header."
+                " Size %d exceeds the maximum %zu.", msg->hdr.size,
+                VHOST_USER_PAYLOAD_SIZE);
+        data->ret = -EPROTO;
+        goto end;
+    }
+
+    if (msg->hdr.size) {
+        p += VHOST_USER_HDR_SIZE;
+        size = msg->hdr.size;
+        r = qemu_chr_fe_read_all(chr, p, size);
+        if (r != size) {
+            int saved_errno = errno;
+            error_report("Failed to read msg payload."
+                         " Read %d instead of %d.", r, msg->hdr.size);
+            data->ret = r < 0 ? -saved_errno : -EIO;
+            goto end;
+        }
+    }
+
+end:
+    g_main_loop_quit(data->loop);
+    return G_SOURCE_REMOVE;
+}
+
+static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
+                           gpointer opaque);
+
+/*
+ * This updates the read handler to use a new event loop context.
+ * Event sources are removed from the previous context : this ensures
+ * that events detected in the previous context are purged. They will
+ * be re-detected and processed in the new context.
+ */
+static void slave_update_read_handler(struct vhost_dev *dev,
+                                      GMainContext *ctxt)
+{
+    struct vhost_user *u = dev->opaque;
+
+    if (!u->slave_ioc) {
+        return;
+    }
+
+    if (u->slave_src) {
+        g_source_destroy(u->slave_src);
+        g_source_unref(u->slave_src);
+    }
+
+    u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
+                                                G_IO_IN | G_IO_HUP,
+                                                slave_read, dev, NULL,
+                                                ctxt);
+}
+
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
+{
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    GMainContext *prev_ctxt = chr->chr->gcontext;
+    GMainContext *ctxt = g_main_context_new();
+    GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
+    struct vhost_user_read_cb_data data = {
+        .dev = dev,
+        .loop = loop,
+        .msg = msg,
+        .ret = 0
+    };
+
+    /*
+     * We want to be able to monitor the slave channel fd while waiting
+     * for chr I/O. This requires an event loop, but we can't nest the
+     * one to which chr is currently attached : its fd handlers might not
+     * be prepared for re-entrancy. So we create a new one and switch chr
+     * to use it.
+     */
+    slave_update_read_handler(dev, ctxt);
+    qemu_chr_be_update_read_handlers(chr->chr, ctxt);
+    qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
+
+    g_main_loop_run(loop);
+
+    /*
+     * Restore the previous event loop context. This also destroys/recreates
+     * event sources : this guarantees that all pending events in the original
+     * context that have been processed by the nested loop are purged.
+     */
+    qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
+    slave_update_read_handler(dev, NULL);
+
+    g_main_loop_unref(loop);
+    g_main_context_unref(ctxt);
+
+    return data.ret;
+}
+
+static int process_message_reply(struct vhost_dev *dev,
+                                 const VhostUserMsg *msg)
+{
+    int ret;
+    VhostUserMsg msg_reply;
+
+    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
+        return 0;
+    }
+
+    ret = vhost_user_read(dev, &msg_reply);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (msg_reply.hdr.request != msg->hdr.request) {
+        error_report("Received unexpected msg type. "
+                     "Expected %d received %d",
+                     msg->hdr.request, msg_reply.hdr.request);
+        return -EPROTO;
+    }
+
+    return msg_reply.payload.u64 ? -EIO : 0;
+}
+
+static bool vhost_user_one_time_request(VhostUserRequest request)
+{
+    switch (request) {
+    case VHOST_USER_SET_OWNER:
+    case VHOST_USER_RESET_OWNER:
+    case VHOST_USER_SET_MEM_TABLE:
+    case VHOST_USER_GET_QUEUE_NUM:
+    case VHOST_USER_NET_SET_MTU:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/* most non-init callers ignore the error */
+static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
+                            int *fds, int fd_num)
+{
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
+
+    /*
+     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
+     * we just need send it once in the first time. For later such
+     * request, we just ignore it.
+     */
+    if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
+        msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
+        return 0;
+    }
+
+    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
+        error_report("Failed to set msg fds.");
+        return -EINVAL;
+    }
+
+    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
+    if (ret != size) {
+        int saved_errno = errno;
+        error_report("Failed to write msg."
+                     " Wrote %d instead of %d.", ret, size);
+        return ret < 0 ? -saved_errno : -EIO;
+    }
+
+    trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
+
+    return 0;
+}
+
+int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GPU_SET_SOCKET,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    return vhost_user_write(dev, &msg, &fd, 1);
+}
+
+static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                   struct vhost_log *log)
+{
+    int fds[VHOST_USER_MAX_RAM_SLOTS];
+    size_t fd_num = 0;
+    bool shmfd = virtio_has_feature(dev->protocol_features,
+                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_LOG_BASE,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
+        .payload.log.mmap_offset = 0,
+        .hdr.size = sizeof(msg.payload.log),
+    };
+
+    if (shmfd && log->fd != -1) {
+        fds[fd_num++] = log->fd;
+    }
+
+    ret = vhost_user_write(dev, &msg, fds, fd_num);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (shmfd) {
+        msg.hdr.size = 0;
+        ret = vhost_user_read(dev, &msg);
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
+            error_report("Received unexpected msg type. "
+                         "Expected %d received %d",
+                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
+            return -EPROTO;
+        }
+    }
+
+    return 0;
+}
+
+static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
+                                            int *fd)
+{
+    MemoryRegion *mr;
+
+    assert((uintptr_t)addr == addr);
+    mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
+    *fd = memory_region_get_fd(mr);
+
+    return mr;
+}
+
+static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
+                                       struct vhost_memory_region *src,
+                                       uint64_t mmap_offset)
+{
+    assert(src != NULL && dst != NULL);
+    dst->userspace_addr = src->userspace_addr;
+    dst->memory_size = src->memory_size;
+    dst->guest_phys_addr = src->guest_phys_addr;
+    dst->mmap_offset = mmap_offset;
+}
+
+static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
+                                             struct vhost_dev *dev,
+                                             VhostUserMsg *msg,
+                                             int *fds, size_t *fd_num,
+                                             bool track_ramblocks)
+{
+    int i, fd;
+    ram_addr_t offset;
+    MemoryRegion *mr;
+    struct vhost_memory_region *reg;
+    VhostUserMemoryRegion region_buffer;
+
+    msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
+
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        reg = dev->mem->regions + i;
+
+        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+        if (fd > 0) {
+            if (track_ramblocks) {
+                assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
+                trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
+                                                      reg->memory_size,
+                                                      reg->guest_phys_addr,
+                                                      reg->userspace_addr,
+                                                      offset);
+                u->region_rb_offset[i] = offset;
+                u->region_rb[i] = mr->ram_block;
+            } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
+                error_report("Failed preparing vhost-user memory table msg");
+                return -ENOBUFS;
+            }
+            vhost_user_fill_msg_region(&region_buffer, reg, offset);
+            msg->payload.memory.regions[*fd_num] = region_buffer;
+            fds[(*fd_num)++] = fd;
+        } else if (track_ramblocks) {
+            u->region_rb_offset[i] = 0;
+            u->region_rb[i] = NULL;
+        }
+    }
+
+    msg->payload.memory.nregions = *fd_num;
+
+    if (!*fd_num) {
+        error_report("Failed initializing vhost-user memory map, "
+                     "consider using -object memory-backend-file share=on");
+        return -EINVAL;
+    }
+
+    msg->hdr.size = sizeof(msg->payload.memory.nregions);
+    msg->hdr.size += sizeof(msg->payload.memory.padding);
+    msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
+
+    return 0;
+}
+
+static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
+                             struct vhost_memory_region *vdev_reg)
+{
+    return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
+        shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
+        shadow_reg->memory_size == vdev_reg->memory_size;
+}
+
+static void scrub_shadow_regions(struct vhost_dev *dev,
+                                 struct scrub_regions *add_reg,
+                                 int *nr_add_reg,
+                                 struct scrub_regions *rem_reg,
+                                 int *nr_rem_reg, uint64_t *shadow_pcb,
+                                 bool track_ramblocks)
+{
+    struct vhost_user *u = dev->opaque;
+    bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
+    struct vhost_memory_region *reg, *shadow_reg;
+    int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
+    ram_addr_t offset;
+    MemoryRegion *mr;
+    bool matching;
+
+    /*
+     * Find memory regions present in our shadow state which are not in
+     * the device's current memory state.
+     *
+     * Mark regions in both the shadow and device state as "found".
+     */
+    for (i = 0; i < u->num_shadow_regions; i++) {
+        shadow_reg = &u->shadow_regions[i];
+        matching = false;
+
+        for (j = 0; j < dev->mem->nregions; j++) {
+            reg = &dev->mem->regions[j];
+
+            mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+
+            if (reg_equal(shadow_reg, reg)) {
+                matching = true;
+                found[j] = true;
+                if (track_ramblocks) {
+                    /*
+                     * Reset postcopy client bases, region_rb, and
+                     * region_rb_offset in case regions are removed.
+                     */
+                    if (fd > 0) {
+                        u->region_rb_offset[j] = offset;
+                        u->region_rb[j] = mr->ram_block;
+                        shadow_pcb[j] = u->postcopy_client_bases[i];
+                    } else {
+                        u->region_rb_offset[j] = 0;
+                        u->region_rb[j] = NULL;
+                    }
+                }
+                break;
+            }
+        }
+
+        /*
+         * If the region was not found in the current device memory state
+         * create an entry for it in the removed list.
+         */
+        if (!matching) {
+            rem_reg[rm_idx].region = shadow_reg;
+            rem_reg[rm_idx++].reg_idx = i;
+        }
+    }
+
+    /*
+     * For regions not marked "found", create entries in the added list.
+     *
+     * Note their indexes in the device memory state and the indexes of their
+     * file descriptors.
+     */
+    for (i = 0; i < dev->mem->nregions; i++) {
+        reg = &dev->mem->regions[i];
+        vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+        if (fd > 0) {
+            ++fd_num;
+        }
+
+        /*
+         * If the region was in both the shadow and device state we don't
+         * need to send a VHOST_USER_ADD_MEM_REG message for it.
+         */
+        if (found[i]) {
+            continue;
+        }
+
+        add_reg[add_idx].region = reg;
+        add_reg[add_idx].reg_idx = i;
+        add_reg[add_idx++].fd_idx = fd_num;
+    }
+    *nr_rem_reg = rm_idx;
+    *nr_add_reg = add_idx;
+
+    return;
+}
+
+static int send_remove_regions(struct vhost_dev *dev,
+                               struct scrub_regions *remove_reg,
+                               int nr_rem_reg, VhostUserMsg *msg,
+                               bool reply_supported)
+{
+    struct vhost_user *u = dev->opaque;
+    struct vhost_memory_region *shadow_reg;
+    int i, fd, shadow_reg_idx, ret;
+    ram_addr_t offset;
+    VhostUserMemoryRegion region_buffer;
+
+    /*
+     * The regions in remove_reg appear in the same order they do in the
+     * shadow table. Therefore we can minimize memory copies by iterating
+     * through remove_reg backwards.
+     */
+    for (i = nr_rem_reg - 1; i >= 0; i--) {
+        shadow_reg = remove_reg[i].region;
+        shadow_reg_idx = remove_reg[i].reg_idx;
+
+        vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
+
+        if (fd > 0) {
+            msg->hdr.request = VHOST_USER_REM_MEM_REG;
+            vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
+            msg->payload.mem_reg.region = region_buffer;
+
+            ret = vhost_user_write(dev, msg, NULL, 0);
+            if (ret < 0) {
+                return ret;
+            }
+
+            if (reply_supported) {
+                ret = process_message_reply(dev, msg);
+                if (ret) {
+                    return ret;
+                }
+            }
+        }
+
+        /*
+         * At this point we know the backend has unmapped the region. It is now
+         * safe to remove it from the shadow table.
+         */
+        memmove(&u->shadow_regions[shadow_reg_idx],
+                &u->shadow_regions[shadow_reg_idx + 1],
+                sizeof(struct vhost_memory_region) *
+                (u->num_shadow_regions - shadow_reg_idx - 1));
+        u->num_shadow_regions--;
+    }
+
+    return 0;
+}
+
+static int send_add_regions(struct vhost_dev *dev,
+                            struct scrub_regions *add_reg, int nr_add_reg,
+                            VhostUserMsg *msg, uint64_t *shadow_pcb,
+                            bool reply_supported, bool track_ramblocks)
+{
+    struct vhost_user *u = dev->opaque;
+    int i, fd, ret, reg_idx, reg_fd_idx;
+    struct vhost_memory_region *reg;
+    MemoryRegion *mr;
+    ram_addr_t offset;
+    VhostUserMsg msg_reply;
+    VhostUserMemoryRegion region_buffer;
+
+    for (i = 0; i < nr_add_reg; i++) {
+        reg = add_reg[i].region;
+        reg_idx = add_reg[i].reg_idx;
+        reg_fd_idx = add_reg[i].fd_idx;
+
+        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+
+        if (fd > 0) {
+            if (track_ramblocks) {
+                trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
+                                                      reg->memory_size,
+                                                      reg->guest_phys_addr,
+                                                      reg->userspace_addr,
+                                                      offset);
+                u->region_rb_offset[reg_idx] = offset;
+                u->region_rb[reg_idx] = mr->ram_block;
+            }
+            msg->hdr.request = VHOST_USER_ADD_MEM_REG;
+            vhost_user_fill_msg_region(&region_buffer, reg, offset);
+            msg->payload.mem_reg.region = region_buffer;
+
+            ret = vhost_user_write(dev, msg, &fd, 1);
+            if (ret < 0) {
+                return ret;
+            }
+
+            if (track_ramblocks) {
+                uint64_t reply_gpa;
+
+                ret = vhost_user_read(dev, &msg_reply);
+                if (ret < 0) {
+                    return ret;
+                }
+
+                reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
+
+                if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
+                    error_report("%s: Received unexpected msg type."
+                                 "Expected %d received %d", __func__,
+                                 VHOST_USER_ADD_MEM_REG,
+                                 msg_reply.hdr.request);
+                    return -EPROTO;
+                }
+
+                /*
+                 * We're using the same structure, just reusing one of the
+                 * fields, so it should be the same size.
+                 */
+                if (msg_reply.hdr.size != msg->hdr.size) {
+                    error_report("%s: Unexpected size for postcopy reply "
+                                 "%d vs %d", __func__, msg_reply.hdr.size,
+                                 msg->hdr.size);
+                    return -EPROTO;
+                }
+
+                /* Get the postcopy client base from the backend's reply. */
+                if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
+                    shadow_pcb[reg_idx] =
+                        msg_reply.payload.mem_reg.region.userspace_addr;
+                    trace_vhost_user_set_mem_table_postcopy(
+                        msg_reply.payload.mem_reg.region.userspace_addr,
+                        msg->payload.mem_reg.region.userspace_addr,
+                        reg_fd_idx, reg_idx);
+                } else {
+                    error_report("%s: invalid postcopy reply for region. "
+                                 "Got guest physical address %" PRIX64 ", expected "
+                                 "%" PRIX64, __func__, reply_gpa,
+                                 dev->mem->regions[reg_idx].guest_phys_addr);
+                    return -EPROTO;
+                }
+            } else if (reply_supported) {
+                ret = process_message_reply(dev, msg);
+                if (ret) {
+                    return ret;
+                }
+            }
+        } else if (track_ramblocks) {
+            u->region_rb_offset[reg_idx] = 0;
+            u->region_rb[reg_idx] = NULL;
+        }
+
+        /*
+         * At this point, we know the backend has mapped in the new
+         * region, if the region has a valid file descriptor.
+         *
+         * The region should now be added to the shadow table.
+         */
+        u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
+            reg->guest_phys_addr;
+        u->shadow_regions[u->num_shadow_regions].userspace_addr =
+            reg->userspace_addr;
+        u->shadow_regions[u->num_shadow_regions].memory_size =
+            reg->memory_size;
+        u->num_shadow_regions++;
+    }
+
+    return 0;
+}
+
+static int vhost_user_add_remove_regions(struct vhost_dev *dev,
+                                         VhostUserMsg *msg,
+                                         bool reply_supported,
+                                         bool track_ramblocks)
+{
+    struct vhost_user *u = dev->opaque;
+    struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
+    struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
+    uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
+    int nr_add_reg, nr_rem_reg;
+    int ret;
+
+    msg->hdr.size = sizeof(msg->payload.mem_reg);
+
+    /* Find the regions which need to be removed or added. */
+    scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
+                         shadow_pcb, track_ramblocks);
+
+    if (nr_rem_reg) {
+        ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
+                                  reply_supported);
+        if (ret < 0) {
+            goto err;
+        }
+    }
+
+    if (nr_add_reg) {
+        ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
+                               reply_supported, track_ramblocks);
+        if (ret < 0) {
+            goto err;
+        }
+    }
+
+    if (track_ramblocks) {
+        memcpy(u->postcopy_client_bases, shadow_pcb,
+               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
+        /*
+         * Now we've registered this with the postcopy code, we ack to the
+         * client, because now we're in the position to be able to deal with
+         * any faults it generates.
+         */
+        /* TODO: Use this for failure cases as well with a bad value. */
+        msg->hdr.size = sizeof(msg->payload.u64);
+        msg->payload.u64 = 0; /* OK */
+
+        ret = vhost_user_write(dev, msg, NULL, 0);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+
+err:
+    if (track_ramblocks) {
+        memcpy(u->postcopy_client_bases, shadow_pcb,
+               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
+    }
+
+    return ret;
+}
+
+static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
+                                             struct vhost_memory *mem,
+                                             bool reply_supported,
+                                             bool config_mem_slots)
+{
+    struct vhost_user *u = dev->opaque;
+    int fds[VHOST_MEMORY_BASELINE_NREGIONS];
+    size_t fd_num = 0;
+    VhostUserMsg msg_reply;
+    int region_i, msg_i;
+    int ret;
+
+    VhostUserMsg msg = {
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    if (u->region_rb_len < dev->mem->nregions) {
+        u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
+        u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
+                                      dev->mem->nregions);
+        memset(&(u->region_rb[u->region_rb_len]), '\0',
+               sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
+        memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
+               sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
+        u->region_rb_len = dev->mem->nregions;
+    }
+
+    if (config_mem_slots) {
+        ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
+        if (ret < 0) {
+            return ret;
+        }
+    } else {
+        ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+                                                true);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = vhost_user_write(dev, &msg, fds, fd_num);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = vhost_user_read(dev, &msg_reply);
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
+            error_report("%s: Received unexpected msg type."
+                         "Expected %d received %d", __func__,
+                         VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
+            return -EPROTO;
+        }
+
+        /*
+         * We're using the same structure, just reusing one of the
+         * fields, so it should be the same size.
+         */
+        if (msg_reply.hdr.size != msg.hdr.size) {
+            error_report("%s: Unexpected size for postcopy reply "
+                         "%d vs %d", __func__, msg_reply.hdr.size,
+                         msg.hdr.size);
+            return -EPROTO;
+        }
+
+        memset(u->postcopy_client_bases, 0,
+               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
+
+        /*
+         * They're in the same order as the regions that were sent
+         * but some of the regions were skipped (above) if they
+         * didn't have fd's
+         */
+        for (msg_i = 0, region_i = 0;
+             region_i < dev->mem->nregions;
+             region_i++) {
+            if (msg_i < fd_num &&
+                msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
+                dev->mem->regions[region_i].guest_phys_addr) {
+                u->postcopy_client_bases[region_i] =
+                    msg_reply.payload.memory.regions[msg_i].userspace_addr;
+                trace_vhost_user_set_mem_table_postcopy(
+                    msg_reply.payload.memory.regions[msg_i].userspace_addr,
+                    msg.payload.memory.regions[msg_i].userspace_addr,
+                    msg_i, region_i);
+                msg_i++;
+            }
+        }
+        if (msg_i != fd_num) {
+            error_report("%s: postcopy reply not fully consumed "
+                         "%d vs %zd",
+                         __func__, msg_i, fd_num);
+            return -EIO;
+        }
+
+        /*
+         * Now we've registered this with the postcopy code, we ack to the
+         * client, because now we're in the position to be able to deal
+         * with any faults it generates.
+         */
+        /* TODO: Use this for failure cases as well with a bad value. */
+        msg.hdr.size = sizeof(msg.payload.u64);
+        msg.payload.u64 = 0; /* OK */
+        ret = vhost_user_write(dev, &msg, NULL, 0);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int vhost_user_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+    struct vhost_user *u = dev->opaque;
+    int fds[VHOST_MEMORY_BASELINE_NREGIONS];
+    size_t fd_num = 0;
+    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+    bool config_mem_slots =
+        virtio_has_feature(dev->protocol_features,
+                           VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
+    int ret;
+
+    if (do_postcopy) {
+        /*
+         * Postcopy has enough differences that it's best done in it's own
+         * version
+         */
+        return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
+                                                 config_mem_slots);
+    }
+
+    VhostUserMsg msg = {
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    if (reply_supported) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    if (config_mem_slots) {
+        ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
+        if (ret < 0) {
+            return ret;
+        }
+    } else {
+        ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
+                                                false);
+        if (ret < 0) {
+            return ret;
+        }
+
+        ret = vhost_user_write(dev, &msg, fds, fd_num);
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (reply_supported) {
+            return process_message_reply(dev, &msg);
+        }
+    }
+
+    return 0;
+}
+
+static int vhost_user_set_vring_endian(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring)
+{
+    bool cross_endian = virtio_has_feature(dev->protocol_features,
+                                           VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.state = *ring,
+        .hdr.size = sizeof(msg.payload.state),
+    };
+
+    if (!cross_endian) {
+        error_report("vhost-user trying to send unhandled ioctl");
+        return -ENOTSUP;
+    }
+
+    return vhost_user_write(dev, &msg, NULL, 0);
+}
+
+static int vhost_set_vring(struct vhost_dev *dev,
+                           unsigned long int request,
+                           struct vhost_vring_state *ring)
+{
+    VhostUserMsg msg = {
+        .hdr.request = request,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.state = *ring,
+        .hdr.size = sizeof(msg.payload.state),
+    };
+
+    return vhost_user_write(dev, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_num(struct vhost_dev *dev,
+                                    struct vhost_vring_state *ring)
+{
+    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
+}
+
+static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
+{
+    assert(n && n->unmap_addr);
+    munmap(n->unmap_addr, qemu_real_host_page_size());
+    n->unmap_addr = NULL;
+}
+
+/*
+ * clean-up function for notifier, will finally free the structure
+ * under rcu.
+ */
+static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
+                                            VirtIODevice *vdev)
+{
+    if (n->addr) {
+        if (vdev) {
+            virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
+        }
+        assert(!n->unmap_addr);
+        n->unmap_addr = n->addr;
+        n->addr = NULL;
+        call_rcu(n, vhost_user_host_notifier_free, rcu);
+    }
+}
+
+static int vhost_user_set_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
+}
+
+static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+    int i;
+
+    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
+        return -EINVAL;
+    }
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        int ret;
+        struct vhost_vring_state state = {
+            .index = dev->vq_index + i,
+            .num   = enable,
+        };
+
+        ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
+        if (ret < 0) {
+            /*
+             * Restoring the previous state is likely infeasible, as well as
+             * proceeding regardless the error, so just bail out and hope for
+             * the device-level recovery.
+             */
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
+                                             int idx)
+{
+    if (idx >= u->notifiers->len) {
+        return NULL;
+    }
+    return g_ptr_array_index(u->notifiers, idx);
+}
+
+static int vhost_user_get_vring_base(struct vhost_dev *dev,
+                                     struct vhost_vring_state *ring)
+{
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GET_VRING_BASE,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.state = *ring,
+        .hdr.size = sizeof(msg.payload.state),
+    };
+    struct vhost_user *u = dev->opaque;
+
+    VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
+    if (n) {
+        vhost_user_host_notifier_remove(n, dev->vdev);
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
+        error_report("Received unexpected msg type. Expected %d received %d",
+                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.state)) {
+        error_report("Received bad msg size.");
+        return -EPROTO;
+    }
+
+    *ring = msg.payload.state;
+
+    return 0;
+}
+
+static int vhost_set_vring_file(struct vhost_dev *dev,
+                                VhostUserRequest request,
+                                struct vhost_vring_file *file)
+{
+    int fds[VHOST_USER_MAX_RAM_SLOTS];
+    size_t fd_num = 0;
+    VhostUserMsg msg = {
+        .hdr.request = request,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
+        .hdr.size = sizeof(msg.payload.u64),
+    };
+
+    if (ioeventfd_enabled() && file->fd > 0) {
+        fds[fd_num++] = file->fd;
+    } else {
+        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+    }
+
+    return vhost_user_write(dev, &msg, fds, fd_num);
+}
+
+static int vhost_user_set_vring_kick(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
+}
+
+static int vhost_user_set_vring_call(struct vhost_dev *dev,
+                                     struct vhost_vring_file *file)
+{
+    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
+}
+
+static int vhost_user_set_vring_err(struct vhost_dev *dev,
+                                    struct vhost_vring_file *file)
+{
+    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
+}
+
+static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
+{
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = request,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
+        return 0;
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (msg.hdr.request != request) {
+        error_report("Received unexpected msg type. Expected %d received %d",
+                     request, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.u64)) {
+        error_report("Received bad msg size.");
+        return -EPROTO;
+    }
+
+    *u64 = msg.payload.u64;
+
+    return 0;
+}
+
+static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
+{
+    if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
+        return -EPROTO;
+    }
+
+    return 0;
+}
+
+static int enforce_reply(struct vhost_dev *dev,
+                         const VhostUserMsg *msg)
+{
+    uint64_t dummy;
+
+    if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+        return process_message_reply(dev, msg);
+    }
+
+   /*
+    * We need to wait for a reply but the backend does not
+    * support replies for the command we just sent.
+    * Send VHOST_USER_GET_FEATURES which makes all backends
+    * send a reply.
+    */
+    return vhost_user_get_features(dev, &dummy);
+}
+
+static int vhost_user_set_vring_addr(struct vhost_dev *dev,
+                                     struct vhost_vring_addr *addr)
+{
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_VRING_ADDR,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.addr = *addr,
+        .hdr.size = sizeof(msg.payload.addr),
+    };
+
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    /*
+     * wait for a reply if logging is enabled to make sure
+     * backend is actually logging changes
+     */
+    bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
+
+    if (reply_supported && wait_for_reply) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (wait_for_reply) {
+        return enforce_reply(dev, &msg);
+    }
+
+    return 0;
+}
+
+static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
+                              bool wait_for_reply)
+{
+    VhostUserMsg msg = {
+        .hdr.request = request,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.u64 = u64,
+        .hdr.size = sizeof(msg.payload.u64),
+    };
+    int ret;
+
+    if (wait_for_reply) {
+        bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                          VHOST_USER_PROTOCOL_F_REPLY_ACK);
+        if (reply_supported) {
+            msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+        }
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (wait_for_reply) {
+        return enforce_reply(dev, &msg);
+    }
+
+    return 0;
+}
+
+static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
+{
+    return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
+}
+
+static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
+{
+    uint64_t value;
+    int ret;
+
+    ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
+    if (ret < 0) {
+        return ret;
+    }
+    *status = value;
+
+    return 0;
+}
+
+static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
+{
+    uint8_t s;
+    int ret;
+
+    ret = vhost_user_get_status(dev, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if ((s & status) == status) {
+        return 0;
+    }
+    s |= status;
+
+    return vhost_user_set_status(dev, s);
+}
+
+static int vhost_user_set_features(struct vhost_dev *dev,
+                                   uint64_t features)
+{
+    /*
+     * wait for a reply if logging is enabled to make sure
+     * backend is actually logging changes
+     */
+    bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
+    int ret;
+
+    /*
+     * We need to include any extra backend only feature bits that
+     * might be needed by our device. Currently this includes the
+     * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
+     * features.
+     */
+    ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
+                              features | dev->backend_features,
+                              log_enabled);
+
+    if (virtio_has_feature(dev->protocol_features,
+                           VHOST_USER_PROTOCOL_F_STATUS)) {
+        if (!ret) {
+            return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+        }
+    }
+
+    return ret;
+}
+
+static int vhost_user_set_protocol_features(struct vhost_dev *dev,
+                                            uint64_t features)
+{
+    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
+                              false);
+}
+
+static int vhost_user_set_owner(struct vhost_dev *dev)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_OWNER,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    return vhost_user_write(dev, &msg, NULL, 0);
+}
+
+static int vhost_user_get_max_memslots(struct vhost_dev *dev,
+                                       uint64_t *max_memslots)
+{
+    uint64_t backend_max_memslots;
+    int err;
+
+    err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
+                             &backend_max_memslots);
+    if (err < 0) {
+        return err;
+    }
+
+    *max_memslots = backend_max_memslots;
+
+    return 0;
+}
+
+static int vhost_user_reset_device(struct vhost_dev *dev)
+{
+    VhostUserMsg msg = {
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    msg.hdr.request = virtio_has_feature(dev->protocol_features,
+                                         VHOST_USER_PROTOCOL_F_RESET_DEVICE)
+        ? VHOST_USER_RESET_DEVICE
+        : VHOST_USER_RESET_OWNER;
+
+    return vhost_user_write(dev, &msg, NULL, 0);
+}
+
+static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
+{
+    if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
+        return -ENOSYS;
+    }
+
+    return dev->config_ops->vhost_dev_config_notifier(dev);
+}
+
+/*
+ * Fetch or create the notifier for a given idx. Newly created
+ * notifiers are added to the pointer array that tracks them.
+ */
+static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
+                                                       int idx)
+{
+    VhostUserHostNotifier *n = NULL;
+    if (idx >= u->notifiers->len) {
+        g_ptr_array_set_size(u->notifiers, idx + 1);
+    }
+
+    n = g_ptr_array_index(u->notifiers, idx);
+    if (!n) {
+        /*
+         * In case notification arrive out-of-order,
+         * make room for current index.
+         */
+        g_ptr_array_remove_index(u->notifiers, idx);
+        n = g_new0(VhostUserHostNotifier, 1);
+        n->idx = idx;
+        g_ptr_array_insert(u->notifiers, idx, n);
+        trace_vhost_user_create_notifier(idx, n);
+    }
+
+    return n;
+}
+
+static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
+                                                       VhostUserVringArea *area,
+                                                       int fd)
+{
+    int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
+    size_t page_size = qemu_real_host_page_size();
+    struct vhost_user *u = dev->opaque;
+    VhostUserState *user = u->user;
+    VirtIODevice *vdev = dev->vdev;
+    VhostUserHostNotifier *n;
+    void *addr;
+    char *name;
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
+        vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
+        return -EINVAL;
+    }
+
+    /*
+     * Fetch notifier and invalidate any old data before setting up
+     * new mapped address.
+     */
+    n = fetch_or_create_notifier(user, queue_idx);
+    vhost_user_host_notifier_remove(n, vdev);
+
+    if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
+        return 0;
+    }
+
+    /* Sanity check. */
+    if (area->size != page_size) {
+        return -EINVAL;
+    }
+
+    addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                fd, area->offset);
+    if (addr == MAP_FAILED) {
+        return -EFAULT;
+    }
+
+    name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
+                           user, queue_idx);
+    if (!n->mr.ram) { /* Don't init again after suspend. */
+        memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
+                                          page_size, addr);
+    } else {
+        n->mr.ram_block->host = addr;
+    }
+    g_free(name);
+
+    if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
+        object_unparent(OBJECT(&n->mr));
+        munmap(addr, page_size);
+        return -ENXIO;
+    }
+
+    n->addr = addr;
+
+    return 0;
+}
+
+static void close_slave_channel(struct vhost_user *u)
+{
+    g_source_destroy(u->slave_src);
+    g_source_unref(u->slave_src);
+    u->slave_src = NULL;
+    object_unref(OBJECT(u->slave_ioc));
+    u->slave_ioc = NULL;
+}
+
+static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
+                           gpointer opaque)
+{
+    struct vhost_dev *dev = opaque;
+    struct vhost_user *u = dev->opaque;
+    VhostUserHeader hdr = { 0, };
+    VhostUserPayload payload = { 0, };
+    Error *local_err = NULL;
+    gboolean rc = G_SOURCE_CONTINUE;
+    int ret = 0;
+    struct iovec iov;
+    g_autofree int *fd = NULL;
+    size_t fdsize = 0;
+    int i;
+
+    /* Read header */
+    iov.iov_base = &hdr;
+    iov.iov_len = VHOST_USER_HDR_SIZE;
+
+    if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
+        error_report_err(local_err);
+        goto err;
+    }
+
+    if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
+        error_report("Failed to read msg header."
+                " Size %d exceeds the maximum %zu.", hdr.size,
+                VHOST_USER_PAYLOAD_SIZE);
+        goto err;
+    }
+
+    /* Read payload */
+    if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
+        error_report_err(local_err);
+        goto err;
+    }
+
+    switch (hdr.request) {
+    case VHOST_USER_SLAVE_IOTLB_MSG:
+        ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
+        break;
+    case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
+        ret = vhost_user_slave_handle_config_change(dev);
+        break;
+    case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
+        ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
+                                                          fd ? fd[0] : -1);
+        break;
+    default:
+        error_report("Received unexpected msg type: %d.", hdr.request);
+        ret = -EINVAL;
+    }
+
+    /*
+     * REPLY_ACK feature handling. Other reply types has to be managed
+     * directly in their request handlers.
+     */
+    if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+        struct iovec iovec[2];
+
+
+        hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
+        hdr.flags |= VHOST_USER_REPLY_MASK;
+
+        payload.u64 = !!ret;
+        hdr.size = sizeof(payload.u64);
+
+        iovec[0].iov_base = &hdr;
+        iovec[0].iov_len = VHOST_USER_HDR_SIZE;
+        iovec[1].iov_base = &payload;
+        iovec[1].iov_len = hdr.size;
+
+        if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
+            error_report_err(local_err);
+            goto err;
+        }
+    }
+
+    goto fdcleanup;
+
+err:
+    close_slave_channel(u);
+    rc = G_SOURCE_REMOVE;
+
+fdcleanup:
+    if (fd) {
+        for (i = 0; i < fdsize; i++) {
+            close(fd[i]);
+        }
+    }
+    return rc;
+}
+
+static int vhost_setup_slave_channel(struct vhost_dev *dev)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+    struct vhost_user *u = dev->opaque;
+    int sv[2], ret = 0;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+    Error *local_err = NULL;
+    QIOChannel *ioc;
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+        return 0;
+    }
+
+    if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        int saved_errno = errno;
+        error_report("socketpair() failed");
+        return -saved_errno;
+    }
+
+    ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
+    if (!ioc) {
+        error_report_err(local_err);
+        return -ECONNREFUSED;
+    }
+    u->slave_ioc = ioc;
+    slave_update_read_handler(dev, NULL);
+
+    if (reply_supported) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    ret = vhost_user_write(dev, &msg, &sv[1], 1);
+    if (ret) {
+        goto out;
+    }
+
+    if (reply_supported) {
+        ret = process_message_reply(dev, &msg);
+    }
+
+out:
+    close(sv[1]);
+    if (ret) {
+        close_slave_channel(u);
+    }
+
+    return ret;
+}
+
+#ifdef CONFIG_LINUX
+/*
+ * Called back from the postcopy fault thread when a fault is received on our
+ * ufd.
+ * TODO: This is Linux specific
+ */
+static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
+                                             void *ufd)
+{
+    struct vhost_dev *dev = pcfd->data;
+    struct vhost_user *u = dev->opaque;
+    struct uffd_msg *msg = ufd;
+    uint64_t faultaddr = msg->arg.pagefault.address;
+    RAMBlock *rb = NULL;
+    uint64_t rb_offset;
+    int i;
+
+    trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
+                                            dev->mem->nregions);
+    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
+        trace_vhost_user_postcopy_fault_handler_loop(i,
+                u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
+        if (faultaddr >= u->postcopy_client_bases[i]) {
+            /* Ofset of the fault address in the vhost region */
+            uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
+            if (region_offset < dev->mem->regions[i].memory_size) {
+                rb_offset = region_offset + u->region_rb_offset[i];
+                trace_vhost_user_postcopy_fault_handler_found(i,
+                        region_offset, rb_offset);
+                rb = u->region_rb[i];
+                return postcopy_request_shared_page(pcfd, rb, faultaddr,
+                                                    rb_offset);
+            }
+        }
+    }
+    error_report("%s: Failed to find region for fault %" PRIx64,
+                 __func__, faultaddr);
+    return -1;
+}
+
+static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
+                                     uint64_t offset)
+{
+    struct vhost_dev *dev = pcfd->data;
+    struct vhost_user *u = dev->opaque;
+    int i;
+
+    trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
+
+    if (!u) {
+        return 0;
+    }
+    /* Translate the offset into an address in the clients address space */
+    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
+        if (u->region_rb[i] == rb &&
+            offset >= u->region_rb_offset[i] &&
+            offset < (u->region_rb_offset[i] +
+                      dev->mem->regions[i].memory_size)) {
+            uint64_t client_addr = (offset - u->region_rb_offset[i]) +
+                                   u->postcopy_client_bases[i];
+            trace_vhost_user_postcopy_waker_found(client_addr);
+            return postcopy_wake_shared(pcfd, client_addr, rb);
+        }
+    }
+
+    trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
+    return 0;
+}
+#endif
+
+/*
+ * Called at the start of an inbound postcopy on reception of the
+ * 'advise' command.
+ */
+static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
+{
+#ifdef CONFIG_LINUX
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    int ufd;
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
+        .hdr.flags = VHOST_USER_VERSION,
+    };
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_setg(errp, "Failed to send postcopy_advise to vhost");
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
+        return ret;
+    }
+
+    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
+        error_setg(errp, "Unexpected msg type. Expected %d received %d",
+                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size) {
+        error_setg(errp, "Received bad msg size.");
+        return -EPROTO;
+    }
+    ufd = qemu_chr_fe_get_msgfd(chr);
+    if (ufd < 0) {
+        error_setg(errp, "%s: Failed to get ufd", __func__);
+        return -EIO;
+    }
+    qemu_socket_set_nonblock(ufd);
+
+    /* register ufd with userfault thread */
+    u->postcopy_fd.fd = ufd;
+    u->postcopy_fd.data = dev;
+    u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
+    u->postcopy_fd.waker = vhost_user_postcopy_waker;
+    u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
+    postcopy_register_shared_ufd(&u->postcopy_fd);
+    return 0;
+#else
+    error_setg(errp, "Postcopy not supported on non-Linux systems");
+    return -ENOSYS;
+#endif
+}
+
+/*
+ * Called at the switch to postcopy on reception of the 'listen' command.
+ */
+static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
+{
+    struct vhost_user *u = dev->opaque;
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
+        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+    };
+    u->postcopy_listen = true;
+
+    trace_vhost_user_postcopy_listen();
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_setg(errp, "Failed to send postcopy_listen to vhost");
+        return ret;
+    }
+
+    ret = process_message_reply(dev, &msg);
+    if (ret) {
+        error_setg(errp, "Failed to receive reply to postcopy_listen");
+        return ret;
+    }
+
+    return 0;
+}
+
+/*
+ * Called at the end of postcopy
+ */
+static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_POSTCOPY_END,
+        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+    };
+    int ret;
+    struct vhost_user *u = dev->opaque;
+
+    trace_vhost_user_postcopy_end_entry();
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_setg(errp, "Failed to send postcopy_end to vhost");
+        return ret;
+    }
+
+    ret = process_message_reply(dev, &msg);
+    if (ret) {
+        error_setg(errp, "Failed to receive reply to postcopy_end");
+        return ret;
+    }
+    postcopy_unregister_shared_ufd(&u->postcopy_fd);
+    close(u->postcopy_fd.fd);
+    u->postcopy_fd.handler = NULL;
+
+    trace_vhost_user_postcopy_end_exit();
+
+    return 0;
+}
+
+static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
+                                        void *opaque)
+{
+    struct PostcopyNotifyData *pnd = opaque;
+    struct vhost_user *u = container_of(notifier, struct vhost_user,
+                                         postcopy_notifier);
+    struct vhost_dev *dev = u->dev;
+
+    switch (pnd->reason) {
+    case POSTCOPY_NOTIFY_PROBE:
+        if (!virtio_has_feature(dev->protocol_features,
+                                VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
+            /* TODO: Get the device name into this error somehow */
+            error_setg(pnd->errp,
+                       "vhost-user backend not capable of postcopy");
+            return -ENOENT;
+        }
+        break;
+
+    case POSTCOPY_NOTIFY_INBOUND_ADVISE:
+        return vhost_user_postcopy_advise(dev, pnd->errp);
+
+    case POSTCOPY_NOTIFY_INBOUND_LISTEN:
+        return vhost_user_postcopy_listen(dev, pnd->errp);
+
+    case POSTCOPY_NOTIFY_INBOUND_END:
+        return vhost_user_postcopy_end(dev, pnd->errp);
+
+    default:
+        /* We ignore notifications we don't know */
+        break;
+    }
+
+    return 0;
+}
+
+static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
+                                   Error **errp)
+{
+    uint64_t features, ram_slots;
+    struct vhost_user *u;
+    VhostUserState *vus = (VhostUserState *) opaque;
+    int err;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    u = g_new0(struct vhost_user, 1);
+    u->user = vus;
+    u->dev = dev;
+    dev->opaque = u;
+
+    err = vhost_user_get_features(dev, &features);
+    if (err < 0) {
+        error_setg_errno(errp, -err, "vhost_backend_init failed");
+        return err;
+    }
+
+    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
+        bool supports_f_config = vus->supports_config ||
+            (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
+        uint64_t protocol_features;
+
+        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
+                                 &protocol_features);
+        if (err < 0) {
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
+        }
+
+        /*
+         * We will use all the protocol features we support - although
+         * we suppress F_CONFIG if we know QEMUs internal code can not support
+         * it.
+         */
+        protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+        if (supports_f_config) {
+            if (!virtio_has_feature(protocol_features,
+                                    VHOST_USER_PROTOCOL_F_CONFIG)) {
+                error_setg(errp, "vhost-user device expecting "
+                           "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
+                           "not support it.");
+                return -EPROTO;
+            }
+        } else {
+            if (virtio_has_feature(protocol_features,
+                                   VHOST_USER_PROTOCOL_F_CONFIG)) {
+                warn_reportf_err(*errp, "vhost-user backend supports "
+                                 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
+                protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+            }
+        }
+
+        /* final set of protocol features */
+        dev->protocol_features = protocol_features;
+        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
+        if (err < 0) {
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
+        }
+
+        /* query the max queues we support if backend supports Multiple Queue */
+        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
+            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
+                                     &dev->max_queues);
+            if (err < 0) {
+                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+                return -EPROTO;
+            }
+        } else {
+            dev->max_queues = 1;
+        }
+
+        if (dev->num_queues && dev->max_queues < dev->num_queues) {
+            error_setg(errp, "The maximum number of queues supported by the "
+                       "backend is %" PRIu64, dev->max_queues);
+            return -EINVAL;
+        }
+
+        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
+                !(virtio_has_feature(dev->protocol_features,
+                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
+                 virtio_has_feature(dev->protocol_features,
+                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
+            error_setg(errp, "IOMMU support requires reply-ack and "
+                       "slave-req protocol features.");
+            return -EINVAL;
+        }
+
+        /* get max memory regions if backend supports configurable RAM slots */
+        if (!virtio_has_feature(dev->protocol_features,
+                                VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
+            u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
+        } else {
+            err = vhost_user_get_max_memslots(dev, &ram_slots);
+            if (err < 0) {
+                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+                return -EPROTO;
+            }
+
+            if (ram_slots < u->user->memory_slots) {
+                error_setg(errp, "The backend specified a max ram slots limit "
+                           "of %" PRIu64", when the prior validated limit was "
+                           "%d. This limit should never decrease.", ram_slots,
+                           u->user->memory_slots);
+                return -EINVAL;
+            }
+
+            u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
+        }
+    }
+
+    if (dev->migration_blocker == NULL &&
+        !virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
+        error_setg(&dev->migration_blocker,
+                   "Migration disabled: vhost-user backend lacks "
+                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
+    }
+
+    if (dev->vq_index == 0) {
+        err = vhost_setup_slave_channel(dev);
+        if (err < 0) {
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
+        }
+    }
+
+    u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
+    postcopy_add_notifier(&u->postcopy_notifier);
+
+    return 0;
+}
+
+static int vhost_user_backend_cleanup(struct vhost_dev *dev)
+{
+    struct vhost_user *u;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    u = dev->opaque;
+    if (u->postcopy_notifier.notify) {
+        postcopy_remove_notifier(&u->postcopy_notifier);
+        u->postcopy_notifier.notify = NULL;
+    }
+    u->postcopy_listen = false;
+    if (u->postcopy_fd.handler) {
+        postcopy_unregister_shared_ufd(&u->postcopy_fd);
+        close(u->postcopy_fd.fd);
+        u->postcopy_fd.handler = NULL;
+    }
+    if (u->slave_ioc) {
+        close_slave_channel(u);
+    }
+    g_free(u->region_rb);
+    u->region_rb = NULL;
+    g_free(u->region_rb_offset);
+    u->region_rb_offset = NULL;
+    u->region_rb_len = 0;
+    g_free(u);
+    dev->opaque = 0;
+
+    return 0;
+}
+
+static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
+{
+    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+    return idx;
+}
+
+static int vhost_user_memslots_limit(struct vhost_dev *dev)
+{
+    struct vhost_user *u = dev->opaque;
+
+    return u->user->memory_slots;
+}
+
+static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
+{
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    return virtio_has_feature(dev->protocol_features,
+                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
+}
+
+static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
+{
+    VhostUserMsg msg = { };
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    /* If guest supports GUEST_ANNOUNCE do nothing */
+    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
+        return 0;
+    }
+
+    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
+    if (virtio_has_feature(dev->protocol_features,
+                           VHOST_USER_PROTOCOL_F_RARP)) {
+        msg.hdr.request = VHOST_USER_SEND_RARP;
+        msg.hdr.flags = VHOST_USER_VERSION;
+        memcpy((char *)&msg.payload.u64, mac_addr, 6);
+        msg.hdr.size = sizeof(msg.payload.u64);
+
+        return vhost_user_write(dev, &msg, NULL, 0);
+    }
+    return -ENOTSUP;
+}
+
+static bool vhost_user_can_merge(struct vhost_dev *dev,
+                                 uint64_t start1, uint64_t size1,
+                                 uint64_t start2, uint64_t size2)
+{
+    ram_addr_t offset;
+    int mfd, rfd;
+
+    (void)vhost_user_get_mr_data(start1, &offset, &mfd);
+    (void)vhost_user_get_mr_data(start2, &offset, &rfd);
+
+    return mfd == rfd;
+}
+
+static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
+{
+    VhostUserMsg msg;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+    int ret;
+
+    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
+        return 0;
+    }
+
+    msg.hdr.request = VHOST_USER_NET_SET_MTU;
+    msg.payload.u64 = mtu;
+    msg.hdr.size = sizeof(msg.payload.u64);
+    msg.hdr.flags = VHOST_USER_VERSION;
+    if (reply_supported) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* If reply_ack supported, slave has to ack specified MTU is valid */
+    if (reply_supported) {
+        return process_message_reply(dev, &msg);
+    }
+
+    return 0;
+}
+
+static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
+                                            struct vhost_iotlb_msg *imsg)
+{
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_IOTLB_MSG,
+        .hdr.size = sizeof(msg.payload.iotlb),
+        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+        .payload.iotlb = *imsg,
+    };
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return process_message_reply(dev, &msg);
+}
+
+
+static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
+{
+    /* No-op as the receive channel is not dedicated to IOTLB messages. */
+}
+
+static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
+                                 uint32_t config_len, Error **errp)
+{
+    int ret;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GET_CONFIG,
+        .hdr.flags = VHOST_USER_VERSION,
+        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                VHOST_USER_PROTOCOL_F_CONFIG)) {
+        error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
+        return -EINVAL;
+    }
+
+    assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
+
+    msg.payload.config.offset = 0;
+    msg.payload.config.size = config_len;
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_get_config failed");
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_get_config failed");
+        return ret;
+    }
+
+    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
+        error_setg(errp,
+                   "Received unexpected msg type. Expected %d received %d",
+                   VHOST_USER_GET_CONFIG, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
+        error_setg(errp, "Received bad msg size.");
+        return -EPROTO;
+    }
+
+    memcpy(config, msg.payload.config.region, config_len);
+
+    return 0;
+}
+
+static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
+                                 uint32_t offset, uint32_t size, uint32_t flags)
+{
+    int ret;
+    uint8_t *p;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_CONFIG,
+        .hdr.flags = VHOST_USER_VERSION,
+        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                VHOST_USER_PROTOCOL_F_CONFIG)) {
+        return -ENOTSUP;
+    }
+
+    if (reply_supported) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
+        return -EINVAL;
+    }
+
+    msg.payload.config.offset = offset,
+    msg.payload.config.size = size,
+    msg.payload.config.flags = flags,
+    p = msg.payload.config.region;
+    memcpy(p, data, size);
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (reply_supported) {
+        return process_message_reply(dev, &msg);
+    }
+
+    return 0;
+}
+
+static int vhost_user_crypto_create_session(struct vhost_dev *dev,
+                                            void *session_info,
+                                            uint64_t *session_id)
+{
+    int ret;
+    bool crypto_session = virtio_has_feature(dev->protocol_features,
+                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
+    CryptoDevBackendSymSessionInfo *sess_info = session_info;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
+        .hdr.flags = VHOST_USER_VERSION,
+        .hdr.size = sizeof(msg.payload.session),
+    };
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+
+    if (!crypto_session) {
+        error_report("vhost-user trying to send unhandled ioctl");
+        return -ENOTSUP;
+    }
+
+    memcpy(&msg.payload.session.session_setup_data, sess_info,
+              sizeof(CryptoDevBackendSymSessionInfo));
+    if (sess_info->key_len) {
+        memcpy(&msg.payload.session.key, sess_info->cipher_key,
+               sess_info->key_len);
+    }
+    if (sess_info->auth_key_len > 0) {
+        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
+               sess_info->auth_key_len);
+    }
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_report("vhost_user_write() return %d, create session failed",
+                     ret);
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        error_report("vhost_user_read() return %d, create session failed",
+                     ret);
+        return ret;
+    }
+
+    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
+        error_report("Received unexpected msg type. Expected %d received %d",
+                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.session)) {
+        error_report("Received bad msg size.");
+        return -EPROTO;
+    }
+
+    if (msg.payload.session.session_id < 0) {
+        error_report("Bad session id: %" PRId64 "",
+                              msg.payload.session.session_id);
+        return -EINVAL;
+    }
+    *session_id = msg.payload.session.session_id;
+
+    return 0;
+}
+
+static int
+vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
+{
+    int ret;
+    bool crypto_session = virtio_has_feature(dev->protocol_features,
+                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
+        .hdr.flags = VHOST_USER_VERSION,
+        .hdr.size = sizeof(msg.payload.u64),
+    };
+    msg.payload.u64 = session_id;
+
+    if (!crypto_session) {
+        error_report("vhost-user trying to send unhandled ioctl");
+        return -ENOTSUP;
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        error_report("vhost_user_write() return %d, close session failed",
+                     ret);
+        return ret;
+    }
+
+    return 0;
+}
+
+static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
+                                          MemoryRegionSection *section)
+{
+    bool result;
+
+    result = memory_region_get_fd(section->mr) >= 0;
+
+    return result;
+}
+
+static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
+                                      uint16_t queue_size,
+                                      struct vhost_inflight *inflight)
+{
+    void *addr;
+    int fd;
+    int ret;
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.inflight.num_queues = dev->nvqs,
+        .payload.inflight.queue_size = queue_size,
+        .hdr.size = sizeof(msg.payload.inflight),
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    ret = vhost_user_write(dev, &msg, NULL, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_user_read(dev, &msg);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
+        error_report("Received unexpected msg type. "
+                     "Expected %d received %d",
+                     VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
+        return -EPROTO;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.inflight)) {
+        error_report("Received bad msg size.");
+        return -EPROTO;
+    }
+
+    if (!msg.payload.inflight.mmap_size) {
+        return 0;
+    }
+
+    fd = qemu_chr_fe_get_msgfd(chr);
+    if (fd < 0) {
+        error_report("Failed to get mem fd");
+        return -EIO;
+    }
+
+    addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
+                MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
+
+    if (addr == MAP_FAILED) {
+        error_report("Failed to mmap mem fd");
+        close(fd);
+        return -EFAULT;
+    }
+
+    inflight->addr = addr;
+    inflight->fd = fd;
+    inflight->size = msg.payload.inflight.mmap_size;
+    inflight->offset = msg.payload.inflight.mmap_offset;
+    inflight->queue_size = queue_size;
+
+    return 0;
+}
+
+static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
+                                      struct vhost_inflight *inflight)
+{
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.inflight.mmap_size = inflight->size,
+        .payload.inflight.mmap_offset = inflight->offset,
+        .payload.inflight.num_queues = dev->nvqs,
+        .payload.inflight.queue_size = inflight->queue_size,
+        .hdr.size = sizeof(msg.payload.inflight),
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    return vhost_user_write(dev, &msg, &inflight->fd, 1);
+}
+
+static void vhost_user_state_destroy(gpointer data)
+{
+    VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
+    if (n) {
+        vhost_user_host_notifier_remove(n, NULL);
+        object_unparent(OBJECT(&n->mr));
+        /*
+         * We can't free until vhost_user_host_notifier_remove has
+         * done it's thing so schedule the free with RCU.
+         */
+        g_free_rcu(n, rcu);
+    }
+}
+
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
+{
+    if (user->chr) {
+        error_setg(errp, "Cannot initialize vhost-user state");
+        return false;
+    }
+    user->chr = chr;
+    user->memory_slots = 0;
+    user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
+                                           &vhost_user_state_destroy);
+    return true;
+}
+
+void vhost_user_cleanup(VhostUserState *user)
+{
+    if (!user->chr) {
+        return;
+    }
+    memory_region_transaction_begin();
+    user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
+    memory_region_transaction_commit();
+    user->chr = NULL;
+}
+
+
+typedef struct {
+    vu_async_close_fn cb;
+    DeviceState *dev;
+    CharBackend *cd;
+    struct vhost_dev *vhost;
+} VhostAsyncCallback;
+
+static void vhost_user_async_close_bh(void *opaque)
+{
+    VhostAsyncCallback *data = opaque;
+    struct vhost_dev *vhost = data->vhost;
+
+    /*
+     * If the vhost_dev has been cleared in the meantime there is
+     * nothing left to do as some other path has completed the
+     * cleanup.
+     */
+    if (vhost->vdev) {
+        data->cb(data->dev);
+    }
+
+    g_free(data);
+}
+
+/*
+ * We only schedule the work if the machine is running. If suspended
+ * we want to keep all the in-flight data as is for migration
+ * purposes.
+ */
+void vhost_user_async_close(DeviceState *d,
+                            CharBackend *chardev, struct vhost_dev *vhost,
+                            vu_async_close_fn cb)
+{
+    if (!runstate_check(RUN_STATE_SHUTDOWN)) {
+        /*
+         * A close event may happen during a read/write, but vhost
+         * code assumes the vhost_dev remains setup, so delay the
+         * stop & clear.
+         */
+        AioContext *ctx = qemu_get_current_aio_context();
+        VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
+
+        /* Save data for the callback */
+        data->cb = cb;
+        data->dev = d;
+        data->cd = chardev;
+        data->vhost = vhost;
+
+        /* Disable any further notifications on the chardev */
+        qemu_chr_fe_set_handlers(chardev,
+                                 NULL, NULL, NULL, NULL, NULL, NULL,
+                                 false);
+
+        aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
+
+        /*
+         * Move vhost device to the stopped state. The vhost-user device
+         * will be clean up and disconnected in BH. This can be useful in
+         * the vhost migration code. If disconnect was caught there is an
+         * option for the general vhost code to get the dev state without
+         * knowing its type (in this case vhost-user).
+         *
+         * Note if the vhost device is fully cleared by the time we
+         * execute the bottom half we won't continue with the cleanup.
+         */
+        vhost->started = false;
+    }
+}
+
+static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
+{
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_STATUS)) {
+        return 0;
+    }
+
+    /* Set device status only for last queue pair */
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+        return 0;
+    }
+
+    if (started) {
+        return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                                          VIRTIO_CONFIG_S_DRIVER |
+                                          VIRTIO_CONFIG_S_DRIVER_OK);
+    } else {
+        return vhost_user_set_status(dev, 0);
+    }
+}
+
+const VhostOps user_ops = {
+        .backend_type = VHOST_BACKEND_TYPE_USER,
+        .vhost_backend_init = vhost_user_backend_init,
+        .vhost_backend_cleanup = vhost_user_backend_cleanup,
+        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
+        .vhost_set_log_base = vhost_user_set_log_base,
+        .vhost_set_mem_table = vhost_user_set_mem_table,
+        .vhost_set_vring_addr = vhost_user_set_vring_addr,
+        .vhost_set_vring_endian = vhost_user_set_vring_endian,
+        .vhost_set_vring_num = vhost_user_set_vring_num,
+        .vhost_set_vring_base = vhost_user_set_vring_base,
+        .vhost_get_vring_base = vhost_user_get_vring_base,
+        .vhost_set_vring_kick = vhost_user_set_vring_kick,
+        .vhost_set_vring_call = vhost_user_set_vring_call,
+        .vhost_set_vring_err = vhost_user_set_vring_err,
+        .vhost_set_features = vhost_user_set_features,
+        .vhost_get_features = vhost_user_get_features,
+        .vhost_set_owner = vhost_user_set_owner,
+        .vhost_reset_device = vhost_user_reset_device,
+        .vhost_get_vq_index = vhost_user_get_vq_index,
+        .vhost_set_vring_enable = vhost_user_set_vring_enable,
+        .vhost_requires_shm_log = vhost_user_requires_shm_log,
+        .vhost_migration_done = vhost_user_migration_done,
+        .vhost_backend_can_merge = vhost_user_can_merge,
+        .vhost_net_set_mtu = vhost_user_net_set_mtu,
+        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
+        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
+        .vhost_get_config = vhost_user_get_config,
+        .vhost_set_config = vhost_user_set_config,
+        .vhost_crypto_create_session = vhost_user_crypto_create_session,
+        .vhost_crypto_close_session = vhost_user_crypto_close_session,
+        .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
+        .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
+        .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+        .vhost_dev_start = vhost_user_dev_start,
+};
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
new file mode 100644
index 00000000..7468e44b
--- /dev/null
+++ b/hw/virtio/vhost-vdpa.c
@@ -0,0 +1,1313 @@
+/*
+ * vhost-vdpa
+ *
+ *  Copyright(c) 2017-2018 Intel Corporation.
+ *  Copyright(c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vhost.h>
+#include <linux/vfio.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/virtio-net.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
+#include "hw/virtio/vhost-vdpa.h"
+#include "exec/address-spaces.h"
+#include "migration/blocker.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "trace.h"
+#include "qapi/error.h"
+
+/*
+ * Return one past the end of the end of section. Be careful with uint64_t
+ * conversions!
+ */
+static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
+{
+    Int128 llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    return llend;
+}
+
+static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
+                                                uint64_t iova_min,
+                                                uint64_t iova_max)
+{
+    Int128 llend;
+
+    if ((!memory_region_is_ram(section->mr) &&
+         !memory_region_is_iommu(section->mr)) ||
+        memory_region_is_protected(section->mr) ||
+        /* vhost-vDPA doesn't allow MMIO to be mapped  */
+        memory_region_is_ram_device(section->mr)) {
+        return true;
+    }
+
+    if (section->offset_within_address_space < iova_min) {
+        error_report("RAM section out of device range (min=0x%" PRIx64
+                     ", addr=0x%" HWADDR_PRIx ")",
+                     iova_min, section->offset_within_address_space);
+        return true;
+    }
+
+    llend = vhost_vdpa_section_end(section);
+    if (int128_gt(llend, int128_make64(iova_max))) {
+        error_report("RAM section out of device range (max=0x%" PRIx64
+                     ", end addr=0x%" PRIx64 ")",
+                     iova_max, int128_get64(llend));
+        return true;
+    }
+
+    return false;
+}
+
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+                       void *vaddr, bool readonly)
+{
+    struct vhost_msg_v2 msg = {};
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = v->msg_type;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
+    msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
+    msg.iotlb.type = VHOST_IOTLB_UPDATE;
+
+   trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
+                            msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
+{
+    struct vhost_msg_v2 msg = {};
+    int fd = v->device_fd;
+    int ret = 0;
+
+    msg.type = v->msg_type;
+    msg.iotlb.iova = iova;
+    msg.iotlb.size = size;
+    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+
+    trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
+                               msg.iotlb.size, msg.iotlb.type);
+
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+            fd, errno, strerror(errno));
+        return -EIO ;
+    }
+
+    return ret;
+}
+
+static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
+{
+    int fd = v->device_fd;
+    struct vhost_msg_v2 msg = {
+        .type = v->msg_type,
+        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
+    };
+
+    trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type);
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+                     fd, errno, strerror(errno));
+    }
+}
+
+static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
+{
+    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
+        !v->iotlb_batch_begin_sent) {
+        vhost_vdpa_listener_begin_batch(v);
+    }
+
+    v->iotlb_batch_begin_sent = true;
+}
+
+static void vhost_vdpa_listener_commit(MemoryListener *listener)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    struct vhost_dev *dev = v->dev;
+    struct vhost_msg_v2 msg = {};
+    int fd = v->device_fd;
+
+    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
+        return;
+    }
+
+    if (!v->iotlb_batch_begin_sent) {
+        return;
+    }
+
+    msg.type = v->msg_type;
+    msg.iotlb.type = VHOST_IOTLB_BATCH_END;
+
+    trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type);
+    if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
+        error_report("failed to write, fd=%d, errno=%d (%s)",
+                     fd, errno, strerror(errno));
+    }
+
+    v->iotlb_batch_begin_sent = false;
+}
+
+static void vhost_vdpa_listener_region_add(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    DMAMap mem_region = {};
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    void *vaddr;
+    int ret;
+
+    if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
+                                            v->iova_range.last)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = vhost_vdpa_section_end(section);
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    memory_region_ref(section->mr);
+
+    /* Here we assume that memory_region_is_ram(section->mr)==true */
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend),
+                                         vaddr, section->readonly);
+
+    llsize = int128_sub(llend, int128_make64(iova));
+    if (v->shadow_vqs_enabled) {
+        int r;
+
+        mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
+        mem_region.size = int128_get64(llsize) - 1,
+        mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
+
+        r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
+        if (unlikely(r != IOVA_OK)) {
+            error_report("Can't allocate a mapping (%d)", r);
+            goto fail;
+        }
+
+        iova = mem_region.iova;
+    }
+
+    vhost_vdpa_iotlb_batch_begin_once(v);
+    ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
+                             vaddr, section->readonly);
+    if (ret) {
+        error_report("vhost vdpa map fail!");
+        goto fail_map;
+    }
+
+    return;
+
+fail_map:
+    if (v->shadow_vqs_enabled) {
+        vhost_iova_tree_remove(v->iova_tree, mem_region);
+    }
+
+fail:
+    /*
+     * On the initfn path, store the first error in the container so we
+     * can gracefully fail.  Runtime, there's not much we can do other
+     * than throw a hardware error.
+     */
+    error_report("vhost-vdpa: DMA mapping failed, unable to continue");
+    return;
+
+}
+
+static void vhost_vdpa_listener_region_del(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+    hwaddr iova;
+    Int128 llend, llsize;
+    int ret;
+
+    if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
+                                            v->iova_range.last)) {
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    llend = vhost_vdpa_section_end(section);
+
+    trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
+
+    if (int128_ge(int128_make64(iova), llend)) {
+        return;
+    }
+
+    llsize = int128_sub(llend, int128_make64(iova));
+
+    if (v->shadow_vqs_enabled) {
+        const DMAMap *result;
+        const void *vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+        DMAMap mem_region = {
+            .translated_addr = (hwaddr)(uintptr_t)vaddr,
+            .size = int128_get64(llsize) - 1,
+        };
+
+        result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
+        if (!result) {
+            /* The memory listener map wasn't mapped */
+            return;
+        }
+        iova = result->iova;
+        vhost_iova_tree_remove(v->iova_tree, *result);
+    }
+    vhost_vdpa_iotlb_batch_begin_once(v);
+    ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
+    if (ret) {
+        error_report("vhost_vdpa dma unmap error!");
+    }
+
+    memory_region_unref(section->mr);
+}
+/*
+ * IOTLB API is used by vhost-vdpa which requires incremental updating
+ * of the mapping. So we can not use generic vhost memory listener which
+ * depends on the addnop().
+ */
+static const MemoryListener vhost_vdpa_memory_listener = {
+    .name = "vhost-vdpa",
+    .commit = vhost_vdpa_listener_commit,
+    .region_add = vhost_vdpa_listener_region_add,
+    .region_del = vhost_vdpa_listener_region_del,
+};
+
+static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
+                             void *arg)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int fd = v->device_fd;
+    int ret;
+
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+
+    ret = ioctl(fd, request, arg);
+    return ret < 0 ? -errno : ret;
+}
+
+static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
+{
+    uint8_t s;
+    int ret;
+
+    trace_vhost_vdpa_add_status(dev, status);
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    s |= status;
+
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!(s & status)) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
+{
+    int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
+                              &v->iova_range);
+    if (ret != 0) {
+        v->iova_range.first = 0;
+        v->iova_range.last = UINT64_MAX;
+    }
+
+    trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
+                                    v->iova_range.last);
+}
+
+/*
+ * The use of this function is for requests that only need to be
+ * applied once. Typically such request occurs at the beginning
+ * of operation, and before setting up queues. It should not be
+ * used for request that performs operation until all queues are
+ * set, which would need to check dev->vq_index_end instead.
+ */
+static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    return v->index == 0;
+}
+
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
+                                       uint64_t *features)
+{
+    int ret;
+
+    ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
+    trace_vhost_vdpa_get_features(dev, *features);
+    return ret;
+}
+
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
+                               Error **errp)
+{
+    g_autoptr(GPtrArray) shadow_vqs = NULL;
+    uint64_t dev_features, svq_features;
+    int r;
+    bool ok;
+
+    if (!v->shadow_vqs_enabled) {
+        return 0;
+    }
+
+    r = vhost_vdpa_get_dev_features(hdev, &dev_features);
+    if (r != 0) {
+        error_setg_errno(errp, -r, "Can't get vdpa device features");
+        return r;
+    }
+
+    svq_features = dev_features;
+    ok = vhost_svq_valid_features(svq_features, errp);
+    if (unlikely(!ok)) {
+        return -1;
+    }
+
+    shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
+    for (unsigned n = 0; n < hdev->nvqs; ++n) {
+        g_autoptr(VhostShadowVirtqueue) svq;
+
+        svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+                            v->shadow_vq_ops_opaque);
+        if (unlikely(!svq)) {
+            error_setg(errp, "Cannot create svq %u", n);
+            return -1;
+        }
+        g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
+    }
+
+    v->shadow_vqs = g_steal_pointer(&shadow_vqs);
+    return 0;
+}
+
+static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
+{
+    struct vhost_vdpa *v;
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+    trace_vhost_vdpa_init(dev, opaque);
+    int ret;
+
+    /*
+     * Similar to VFIO, we end up pinning all guest memory and have to
+     * disable discarding of RAM.
+     */
+    ret = ram_block_discard_disable(true);
+    if (ret) {
+        error_report("Cannot set discarding of RAM broken");
+        return ret;
+    }
+
+    v = opaque;
+    v->dev = dev;
+    dev->opaque =  opaque ;
+    v->listener = vhost_vdpa_memory_listener;
+    v->msg_type = VHOST_IOTLB_MSG_V2;
+    ret = vhost_vdpa_init_svq(dev, v, errp);
+    if (ret) {
+        goto err;
+    }
+
+    vhost_vdpa_get_iova_range(v);
+
+    if (!vhost_vdpa_first_dev(dev)) {
+        return 0;
+    }
+
+    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                               VIRTIO_CONFIG_S_DRIVER);
+
+    return 0;
+
+err:
+    ram_block_discard_disable(false);
+    return ret;
+}
+
+static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
+                                            int queue_index)
+{
+    size_t page_size = qemu_real_host_page_size();
+    struct vhost_vdpa *v = dev->opaque;
+    VirtIODevice *vdev = dev->vdev;
+    VhostVDPAHostNotifier *n;
+
+    n = &v->notifier[queue_index];
+
+    if (n->addr) {
+        virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
+        object_unparent(OBJECT(&n->mr));
+        munmap(n->addr, page_size);
+        n->addr = NULL;
+    }
+}
+
+static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
+{
+    size_t page_size = qemu_real_host_page_size();
+    struct vhost_vdpa *v = dev->opaque;
+    VirtIODevice *vdev = dev->vdev;
+    VhostVDPAHostNotifier *n;
+    int fd = v->device_fd;
+    void *addr;
+    char *name;
+
+    vhost_vdpa_host_notifier_uninit(dev, queue_index);
+
+    n = &v->notifier[queue_index];
+
+    addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+                queue_index * page_size);
+    if (addr == MAP_FAILED) {
+        goto err;
+    }
+
+    name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
+                           v, queue_index);
+    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
+                                      page_size, addr);
+    g_free(name);
+
+    if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
+        object_unparent(OBJECT(&n->mr));
+        munmap(addr, page_size);
+        goto err;
+    }
+    n->addr = addr;
+
+    return 0;
+
+err:
+    return -1;
+}
+
+static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
+{
+    int i;
+
+    for (i = dev->vq_index; i < dev->vq_index + n; i++) {
+        vhost_vdpa_host_notifier_uninit(dev, i);
+    }
+}
+
+static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int i;
+
+    if (v->shadow_vqs_enabled) {
+        /* FIXME SVQ is not compatible with host notifiers mr */
+        return;
+    }
+
+    for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
+        if (vhost_vdpa_host_notifier_init(dev, i)) {
+            goto err;
+        }
+    }
+
+    return;
+
+err:
+    vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
+    return;
+}
+
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    size_t idx;
+
+    if (!v->shadow_vqs) {
+        return;
+    }
+
+    for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
+        vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
+    }
+    g_ptr_array_free(v->shadow_vqs, true);
+}
+
+static int vhost_vdpa_cleanup(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v;
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+    v = dev->opaque;
+    trace_vhost_vdpa_cleanup(dev, v);
+    vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
+    memory_listener_unregister(&v->listener);
+    vhost_vdpa_svq_cleanup(dev);
+
+    dev->opaque = NULL;
+    ram_block_discard_disable(false);
+
+    return 0;
+}
+
+static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
+{
+    trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
+    return INT_MAX;
+}
+
+static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+    if (!vhost_vdpa_first_dev(dev)) {
+        return 0;
+    }
+
+    trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
+    if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
+        trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
+        int i;
+        for (i = 0; i < mem->nregions; i++) {
+            trace_vhost_vdpa_dump_regions(dev, i,
+                                          mem->regions[i].guest_phys_addr,
+                                          mem->regions[i].memory_size,
+                                          mem->regions[i].userspace_addr,
+                                          mem->regions[i].flags_padding);
+        }
+    }
+    if (mem->padding) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int vhost_vdpa_set_features(struct vhost_dev *dev,
+                                   uint64_t features)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int ret;
+
+    if (!vhost_vdpa_first_dev(dev)) {
+        return 0;
+    }
+
+    if (v->shadow_vqs_enabled) {
+        if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
+            /*
+             * QEMU is just trying to enable or disable logging. SVQ handles
+             * this sepparately, so no need to forward this.
+             */
+            v->acked_features = features;
+            return 0;
+        }
+
+        v->acked_features = features;
+
+        /* We must not ack _F_LOG if SVQ is enabled */
+        features &= ~BIT_ULL(VHOST_F_LOG_ALL);
+    }
+
+    trace_vhost_vdpa_set_features(dev, features);
+    ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
+    if (ret) {
+        return ret;
+    }
+
+    return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+}
+
+static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+{
+    uint64_t features;
+    uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
+        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
+    int r;
+
+    if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
+        return -EFAULT;
+    }
+
+    features &= f;
+
+    if (vhost_vdpa_first_dev(dev)) {
+        r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+        if (r) {
+            return -EFAULT;
+        }
+    }
+
+    dev->backend_cap = features;
+
+    return 0;
+}
+
+static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
+                                    uint32_t *device_id)
+{
+    int ret;
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
+    trace_vhost_vdpa_get_device_id(dev, *device_id);
+    return ret;
+}
+
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
+{
+    if (!v->shadow_vqs_enabled) {
+        return;
+    }
+
+    for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+        vhost_svq_stop(svq);
+    }
+}
+
+static int vhost_vdpa_reset_device(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int ret;
+    uint8_t status = 0;
+
+    vhost_vdpa_reset_svq(v);
+
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
+    trace_vhost_vdpa_reset_device(dev, status);
+    return ret;
+}
+
+static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
+{
+    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+    trace_vhost_vdpa_get_vq_index(dev, idx, idx);
+    return idx;
+}
+
+static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
+{
+    int i;
+    trace_vhost_vdpa_set_vring_ready(dev);
+    for (i = 0; i < dev->nvqs; ++i) {
+        struct vhost_vring_state state = {
+            .index = dev->vq_index + i,
+            .num = 1,
+        };
+        vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
+    }
+    return 0;
+}
+
+static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
+                                   uint32_t config_len)
+{
+    int b, len;
+    char line[QEMU_HEXDUMP_LINE_LEN];
+
+    for (b = 0; b < config_len; b += 16) {
+        len = config_len - b;
+        qemu_hexdump_line(line, b, config, len, false);
+        trace_vhost_vdpa_dump_config(dev, line);
+    }
+}
+
+static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
+                                   uint32_t offset, uint32_t size,
+                                   uint32_t flags)
+{
+    struct vhost_vdpa_config *config;
+    int ret;
+    unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
+
+    trace_vhost_vdpa_set_config(dev, offset, size, flags);
+    config = g_malloc(size + config_size);
+    config->off = offset;
+    config->len = size;
+    memcpy(config->buf, data, size);
+    if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
+        trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
+        vhost_vdpa_dump_config(dev, data, size);
+    }
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
+    g_free(config);
+    return ret;
+}
+
+static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
+                                   uint32_t config_len, Error **errp)
+{
+    struct vhost_vdpa_config *v_config;
+    unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
+    int ret;
+
+    trace_vhost_vdpa_get_config(dev, config, config_len);
+    v_config = g_malloc(config_len + config_size);
+    v_config->len = config_len;
+    v_config->off = 0;
+    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
+    memcpy(config, v_config->buf, config_len);
+    g_free(v_config);
+    if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
+        trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
+        vhost_vdpa_dump_config(dev, config, config_len);
+    }
+    return ret;
+ }
+
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
+                                         struct vhost_vring_state *ring)
+{
+    trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
+}
+
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
+                                         struct vhost_vring_file *file)
+{
+    trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
+}
+
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
+                                         struct vhost_vring_file *file)
+{
+    trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
+}
+
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
+                                         struct vhost_vring_addr *addr)
+{
+    trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
+                                addr->desc_user_addr, addr->used_user_addr,
+                                addr->avail_user_addr,
+                                addr->log_guest_addr);
+
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
+
+}
+
+/**
+ * Set the shadow virtqueue descriptors to the device
+ *
+ * @dev: The vhost device model
+ * @svq: The shadow virtqueue
+ * @idx: The index of the virtqueue in the vhost device
+ * @errp: Error
+ *
+ * Note that this function does not rewind kick file descriptor if cannot set
+ * call one.
+ */
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
+                                  VhostShadowVirtqueue *svq, unsigned idx,
+                                  Error **errp)
+{
+    struct vhost_vring_file file = {
+        .index = dev->vq_index + idx,
+    };
+    const EventNotifier *event_notifier = &svq->hdev_kick;
+    int r;
+
+    file.fd = event_notifier_get_fd(event_notifier);
+    r = vhost_vdpa_set_vring_dev_kick(dev, &file);
+    if (unlikely(r != 0)) {
+        error_setg_errno(errp, -r, "Can't set device kick fd");
+        return r;
+    }
+
+    event_notifier = &svq->hdev_call;
+    file.fd = event_notifier_get_fd(event_notifier);
+    r = vhost_vdpa_set_vring_dev_call(dev, &file);
+    if (unlikely(r != 0)) {
+        error_setg_errno(errp, -r, "Can't set device call fd");
+    }
+
+    return r;
+}
+
+/**
+ * Unmap a SVQ area in the device
+ */
+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr)
+{
+    const DMAMap needle = {
+        .translated_addr = addr,
+    };
+    const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle);
+    hwaddr size;
+    int r;
+
+    if (unlikely(!result)) {
+        error_report("Unable to find SVQ address to unmap");
+        return;
+    }
+
+    size = ROUND_UP(result->size, qemu_real_host_page_size());
+    r = vhost_vdpa_dma_unmap(v, result->iova, size);
+    if (unlikely(r < 0)) {
+        error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r);
+        return;
+    }
+
+    vhost_iova_tree_remove(v->iova_tree, *result);
+}
+
+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
+                                       const VhostShadowVirtqueue *svq)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    struct vhost_vring_addr svq_addr;
+
+    vhost_svq_get_vring_addr(svq, &svq_addr);
+
+    vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr);
+
+    vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr);
+}
+
+/**
+ * Map the SVQ area in the device
+ *
+ * @v: Vhost-vdpa device
+ * @needle: The area to search iova
+ * @errorp: Error pointer
+ */
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
+                                    Error **errp)
+{
+    int r;
+
+    r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
+    if (unlikely(r != IOVA_OK)) {
+        error_setg(errp, "Cannot allocate iova (%d)", r);
+        return false;
+    }
+
+    r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
+                           (void *)(uintptr_t)needle->translated_addr,
+                           needle->perm == IOMMU_RO);
+    if (unlikely(r != 0)) {
+        error_setg_errno(errp, -r, "Cannot map region to device");
+        vhost_iova_tree_remove(v->iova_tree, *needle);
+    }
+
+    return r == 0;
+}
+
+/**
+ * Map the shadow virtqueue rings in the device
+ *
+ * @dev: The vhost device
+ * @svq: The shadow virtqueue
+ * @addr: Assigned IOVA addresses
+ * @errp: Error pointer
+ */
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
+                                     const VhostShadowVirtqueue *svq,
+                                     struct vhost_vring_addr *addr,
+                                     Error **errp)
+{
+    DMAMap device_region, driver_region;
+    struct vhost_vring_addr svq_addr;
+    struct vhost_vdpa *v = dev->opaque;
+    size_t device_size = vhost_svq_device_area_size(svq);
+    size_t driver_size = vhost_svq_driver_area_size(svq);
+    size_t avail_offset;
+    bool ok;
+
+    ERRP_GUARD();
+    vhost_svq_get_vring_addr(svq, &svq_addr);
+
+    driver_region = (DMAMap) {
+        .translated_addr = svq_addr.desc_user_addr,
+        .size = driver_size - 1,
+        .perm = IOMMU_RO,
+    };
+    ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
+    if (unlikely(!ok)) {
+        error_prepend(errp, "Cannot create vq driver region: ");
+        return false;
+    }
+    addr->desc_user_addr = driver_region.iova;
+    avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
+    addr->avail_user_addr = driver_region.iova + avail_offset;
+
+    device_region = (DMAMap) {
+        .translated_addr = svq_addr.used_user_addr,
+        .size = device_size - 1,
+        .perm = IOMMU_RW,
+    };
+    ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
+    if (unlikely(!ok)) {
+        error_prepend(errp, "Cannot create vq device region: ");
+        vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr);
+    }
+    addr->used_user_addr = device_region.iova;
+
+    return ok;
+}
+
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
+                                 VhostShadowVirtqueue *svq, unsigned idx,
+                                 Error **errp)
+{
+    uint16_t vq_index = dev->vq_index + idx;
+    struct vhost_vring_state s = {
+        .index = vq_index,
+    };
+    int r;
+
+    r = vhost_vdpa_set_dev_vring_base(dev, &s);
+    if (unlikely(r)) {
+        error_setg_errno(errp, -r, "Cannot set vring base");
+        return false;
+    }
+
+    r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
+    return r == 0;
+}
+
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    Error *err = NULL;
+    unsigned i;
+
+    if (!v->shadow_vqs) {
+        return true;
+    }
+
+    for (i = 0; i < v->shadow_vqs->len; ++i) {
+        VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+        struct vhost_vring_addr addr = {
+            .index = dev->vq_index + i,
+        };
+        int r;
+        bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
+        if (unlikely(!ok)) {
+            goto err;
+        }
+
+        vhost_svq_start(svq, dev->vdev, vq);
+        ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
+        if (unlikely(!ok)) {
+            goto err_map;
+        }
+
+        /* Override vring GPA set by vhost subsystem */
+        r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
+        if (unlikely(r != 0)) {
+            error_setg_errno(&err, -r, "Cannot set device address");
+            goto err_set_addr;
+        }
+    }
+
+    return true;
+
+err_set_addr:
+    vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
+
+err_map:
+    vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
+
+err:
+    error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+    for (unsigned j = 0; j < i; ++j) {
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
+        vhost_vdpa_svq_unmap_rings(dev, svq);
+        vhost_svq_stop(svq);
+    }
+
+    return false;
+}
+
+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    if (!v->shadow_vqs) {
+        return;
+    }
+
+    for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+        vhost_vdpa_svq_unmap_rings(dev, svq);
+    }
+}
+
+static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    bool ok;
+    trace_vhost_vdpa_dev_start(dev, started);
+
+    if (started) {
+        vhost_vdpa_host_notifiers_init(dev);
+        ok = vhost_vdpa_svqs_start(dev);
+        if (unlikely(!ok)) {
+            return -1;
+        }
+        vhost_vdpa_set_vring_ready(dev);
+    } else {
+        vhost_vdpa_svqs_stop(dev);
+        vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
+    }
+
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+        return 0;
+    }
+
+    if (started) {
+        memory_listener_register(&v->listener, &address_space_memory);
+        return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+    } else {
+        vhost_vdpa_reset_device(dev);
+        vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+                                   VIRTIO_CONFIG_S_DRIVER);
+        memory_listener_unregister(&v->listener);
+
+        return 0;
+    }
+}
+
+static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                     struct vhost_log *log)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) {
+        return 0;
+    }
+
+    trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
+                                  log->log);
+    return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
+}
+
+static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
+                                       struct vhost_vring_addr *addr)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    if (v->shadow_vqs_enabled) {
+        /*
+         * Device vring addr was set at device start. SVQ base is handled by
+         * VirtQueue code.
+         */
+        return 0;
+    }
+
+    return vhost_vdpa_set_vring_dev_addr(dev, addr);
+}
+
+static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
+                                      struct vhost_vring_state *ring)
+{
+    trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
+    return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
+}
+
+static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
+
+    /*
+     * vhost-vdpa devices does not support in-flight requests. Set all of them
+     * as available.
+     *
+     * TODO: This is ok for networking, but other kinds of devices might
+     * have problems with these retransmissions.
+     */
+    while (virtqueue_rewind(vq, 1)) {
+        continue;
+    }
+    if (v->shadow_vqs_enabled) {
+        /*
+         * Device vring base was set at device start. SVQ base is handled by
+         * VirtQueue code.
+         */
+        return 0;
+    }
+
+    return vhost_vdpa_set_dev_vring_base(dev, ring);
+}
+
+static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
+                                       struct vhost_vring_state *ring)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int ret;
+
+    if (v->shadow_vqs_enabled) {
+        ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
+        return 0;
+    }
+
+    ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
+    trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
+    return ret;
+}
+
+static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int vdpa_idx = file->index - dev->vq_index;
+
+    if (v->shadow_vqs_enabled) {
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+        vhost_svq_set_svq_kick_fd(svq, file->fd);
+        return 0;
+    } else {
+        return vhost_vdpa_set_vring_dev_kick(dev, file);
+    }
+}
+
+static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
+                                       struct vhost_vring_file *file)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    if (v->shadow_vqs_enabled) {
+        int vdpa_idx = file->index - dev->vq_index;
+        VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+
+        vhost_svq_set_svq_call_fd(svq, file->fd);
+        return 0;
+    } else {
+        return vhost_vdpa_set_vring_dev_call(dev, file);
+    }
+}
+
+static int vhost_vdpa_get_features(struct vhost_dev *dev,
+                                     uint64_t *features)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    int ret = vhost_vdpa_get_dev_features(dev, features);
+
+    if (ret == 0 && v->shadow_vqs_enabled) {
+        /* Add SVQ logging capabilities */
+        *features |= BIT_ULL(VHOST_F_LOG_ALL);
+    }
+
+    return ret;
+}
+
+static int vhost_vdpa_set_owner(struct vhost_dev *dev)
+{
+    if (!vhost_vdpa_first_dev(dev)) {
+        return 0;
+    }
+
+    trace_vhost_vdpa_set_owner(dev);
+    return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+}
+
+static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
+                    struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
+{
+    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+    addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
+    addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
+    addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
+    trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
+                                 addr->avail_user_addr, addr->used_user_addr);
+    return 0;
+}
+
+static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
+{
+    return true;
+}
+
+const VhostOps vdpa_ops = {
+        .backend_type = VHOST_BACKEND_TYPE_VDPA,
+        .vhost_backend_init = vhost_vdpa_init,
+        .vhost_backend_cleanup = vhost_vdpa_cleanup,
+        .vhost_set_log_base = vhost_vdpa_set_log_base,
+        .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
+        .vhost_set_vring_num = vhost_vdpa_set_vring_num,
+        .vhost_set_vring_base = vhost_vdpa_set_vring_base,
+        .vhost_get_vring_base = vhost_vdpa_get_vring_base,
+        .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
+        .vhost_set_vring_call = vhost_vdpa_set_vring_call,
+        .vhost_get_features = vhost_vdpa_get_features,
+        .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
+        .vhost_set_owner = vhost_vdpa_set_owner,
+        .vhost_set_vring_endian = NULL,
+        .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
+        .vhost_set_mem_table = vhost_vdpa_set_mem_table,
+        .vhost_set_features = vhost_vdpa_set_features,
+        .vhost_reset_device = vhost_vdpa_reset_device,
+        .vhost_get_vq_index = vhost_vdpa_get_vq_index,
+        .vhost_get_config  = vhost_vdpa_get_config,
+        .vhost_set_config = vhost_vdpa_set_config,
+        .vhost_requires_shm_log = NULL,
+        .vhost_migration_done = NULL,
+        .vhost_backend_can_merge = NULL,
+        .vhost_net_set_mtu = NULL,
+        .vhost_set_iotlb_callback = NULL,
+        .vhost_send_device_iotlb_msg = NULL,
+        .vhost_dev_start = vhost_vdpa_dev_start,
+        .vhost_get_device_id = vhost_vdpa_get_device_id,
+        .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
+        .vhost_force_iommu = vhost_vdpa_force_iommu,
+};
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
new file mode 100644
index 00000000..d21c72b4
--- /dev/null
+++ b/hw/virtio/vhost-vsock-common.c
@@ -0,0 +1,300 @@
+/*
+ * Parent class for vhost-vsock devices
+ *
+ * Copyright 2015-2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_vsock.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
+#include "qemu/error-report.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-vsock.h"
+#include "qemu/iov.h"
+#include "monitor/monitor.h"
+
+const int feature_bits[] = {
+    VIRTIO_VSOCK_F_SEQPACKET,
+    VIRTIO_F_RING_RESET,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features,
+                                         Error **errp)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    if (vvc->seqpacket != ON_OFF_AUTO_OFF) {
+        virtio_add_feature(&features, VIRTIO_VSOCK_F_SEQPACKET);
+    }
+
+    features = vhost_get_features(&vvc->vhost_dev, feature_bits, features);
+
+    if (vvc->seqpacket == ON_OFF_AUTO_ON &&
+        !virtio_has_feature(features, VIRTIO_VSOCK_F_SEQPACKET)) {
+        error_setg(errp, "vhost-vsock backend doesn't support seqpacket");
+    }
+
+    return features;
+}
+
+int vhost_vsock_common_start(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+    int i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return -ENOSYS;
+    }
+
+    ret = vhost_dev_enable_notifiers(&vvc->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return ret;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    vvc->vhost_dev.acked_features = vdev->guest_features;
+    ret = vhost_dev_start(&vvc->vhost_dev, vdev, true);
+    if (ret < 0) {
+        error_report("Error starting vhost: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here.  virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < vvc->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&vvc->vhost_dev, vdev, i, false);
+    }
+
+    return 0;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev);
+    return ret;
+}
+
+void vhost_vsock_common_stop(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&vvc->vhost_dev, vdev, true);
+
+    ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev);
+}
+
+
+static void vhost_vsock_common_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /* Do nothing */
+}
+
+static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                            bool mask)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask);
+}
+
+static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev,
+                                               int idx)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    return vhost_virtqueue_pending(&vvc->vhost_dev, idx);
+}
+
+static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc)
+{
+    VirtQueueElement *elem;
+    VirtQueue *vq = vvc->event_vq;
+    struct virtio_vsock_event event = {
+        .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET),
+    };
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        error_report("vhost-vsock missed transport reset event");
+        return;
+    }
+
+    if (elem->out_num) {
+        error_report("invalid vhost-vsock event virtqueue element with "
+                     "out buffers");
+        goto err;
+    }
+
+    if (iov_from_buf(elem->in_sg, elem->in_num, 0,
+                     &event, sizeof(event)) != sizeof(event)) {
+        error_report("vhost-vsock event virtqueue element is too short");
+        goto err;
+    }
+
+    virtqueue_push(vq, elem, sizeof(event));
+    virtio_notify(VIRTIO_DEVICE(vvc), vq);
+
+    g_free(elem);
+    return;
+
+err:
+    virtqueue_detach_element(vq, elem, 0);
+    g_free(elem);
+}
+
+static void vhost_vsock_common_post_load_timer_cleanup(VHostVSockCommon *vvc)
+{
+    if (!vvc->post_load_timer) {
+        return;
+    }
+
+    timer_free(vvc->post_load_timer);
+    vvc->post_load_timer = NULL;
+}
+
+static void vhost_vsock_common_post_load_timer_cb(void *opaque)
+{
+    VHostVSockCommon *vvc = opaque;
+
+    vhost_vsock_common_post_load_timer_cleanup(vvc);
+    vhost_vsock_common_send_transport_reset(vvc);
+}
+
+int vhost_vsock_common_pre_save(void *opaque)
+{
+    VHostVSockCommon *vvc = opaque;
+
+    /*
+     * At this point, backend must be stopped, otherwise
+     * it might keep writing to memory.
+     */
+    assert(!vhost_dev_is_started(&vvc->vhost_dev));
+
+    return 0;
+}
+
+int vhost_vsock_common_post_load(void *opaque, int version_id)
+{
+    VHostVSockCommon *vvc = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vvc);
+
+    if (virtio_queue_get_addr(vdev, 2)) {
+        /*
+         * Defer transport reset event to a vm clock timer so that virtqueue
+         * changes happen after migration has completed.
+         */
+        assert(!vvc->post_load_timer);
+        vvc->post_load_timer =
+            timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                         vhost_vsock_common_post_load_timer_cb,
+                         vvc);
+        timer_mod(vvc->post_load_timer, 1);
+    }
+    return 0;
+}
+
+void vhost_vsock_common_realize(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    virtio_init(vdev, VIRTIO_ID_VSOCK, sizeof(struct virtio_vsock_config));
+
+    /* Receive and transmit queues belong to vhost */
+    vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+                                      vhost_vsock_common_handle_output);
+    vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+                                       vhost_vsock_common_handle_output);
+
+    /* The event queue belongs to QEMU */
+    vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+                                       vhost_vsock_common_handle_output);
+
+    vvc->vhost_dev.nvqs = ARRAY_SIZE(vvc->vhost_vqs);
+    vvc->vhost_dev.vqs = vvc->vhost_vqs;
+
+    vvc->post_load_timer = NULL;
+}
+
+void vhost_vsock_common_unrealize(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    vhost_vsock_common_post_load_timer_cleanup(vvc);
+
+    virtio_delete_queue(vvc->recv_vq);
+    virtio_delete_queue(vvc->trans_vq);
+    virtio_delete_queue(vvc->event_vq);
+    virtio_cleanup(vdev);
+}
+
+static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    return &vvc->vhost_dev;
+}
+
+static Property vhost_vsock_common_properties[] = {
+    DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket,
+                            ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_common_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vhost_vsock_common_properties);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask;
+    vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending;
+    vdc->get_vhost = vhost_vsock_common_get_vhost;
+}
+
+static const TypeInfo vhost_vsock_common_info = {
+    .name = TYPE_VHOST_VSOCK_COMMON,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostVSockCommon),
+    .class_init = vhost_vsock_common_class_init,
+    .abstract = true,
+};
+
+static void vhost_vsock_common_register_types(void)
+{
+    type_register_static(&vhost_vsock_common_info);
+}
+
+type_init(vhost_vsock_common_register_types)
diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c
new file mode 100644
index 00000000..9f34414d
--- /dev/null
+++ b/hw/virtio/vhost-vsock-pci.c
@@ -0,0 +1,96 @@
+/*
+ * Vhost vsock PCI Bindings
+ *
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-vsock.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VHostVSockPCI VHostVSockPCI;
+
+/*
+ * vhost-vsock-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base"
+DECLARE_INSTANCE_CHECKER(VHostVSockPCI, VHOST_VSOCK_PCI,
+                         TYPE_VHOST_VSOCK_PCI)
+
+struct VHostVSockPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostVSock vdev;
+};
+
+/* vhost-vsock-pci */
+
+static Property vhost_vsock_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIODevice *virtio_dev = VIRTIO_DEVICE(vdev);
+
+    /*
+     * To avoid migration issues, we force virtio version 1 only when
+     * legacy check is enabled in the new machine types (>= 5.1).
+     */
+    if (!virtio_legacy_check_disabled(virtio_dev)) {
+        virtio_pci_force_virtio_1(vpci_dev);
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_vsock_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_vsock_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, vhost_vsock_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_vsock_pci_instance_init(Object *obj)
+{
+    VHostVSockPCI *dev = VHOST_VSOCK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_VSOCK);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = {
+    .base_name             = TYPE_VHOST_VSOCK_PCI,
+    .generic_name          = "vhost-vsock-pci",
+    .non_transitional_name = "vhost-vsock-pci-non-transitional",
+    .instance_size = sizeof(VHostVSockPCI),
+    .instance_init = vhost_vsock_pci_instance_init,
+    .class_init    = vhost_vsock_pci_class_init,
+};
+
+static void virtio_pci_vhost_register(void)
+{
+    virtio_pci_types_register(&vhost_vsock_pci_info);
+}
+
+type_init(virtio_pci_vhost_register)
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
new file mode 100644
index 00000000..aa16d584
--- /dev/null
+++ b/hw/virtio/vhost-vsock.c
@@ -0,0 +1,239 @@
+/*
+ * Virtio vsock device
+ *
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_vsock.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
+#include "qemu/error-report.h"
+#include "qemu/sockets.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-vsock.h"
+#include "monitor/monitor.h"
+
+static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    struct virtio_vsock_config vsockcfg = {};
+
+    virtio_stq_p(vdev, &vsockcfg.guest_cid, vsock->conf.guest_cid);
+    memcpy(config, &vsockcfg, sizeof(vsockcfg));
+}
+
+static int vhost_vsock_set_guest_cid(VirtIODevice *vdev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops;
+    int ret;
+
+    if (!vhost_ops->vhost_vsock_set_guest_cid) {
+        return -ENOSYS;
+    }
+
+    ret = vhost_ops->vhost_vsock_set_guest_cid(&vvc->vhost_dev,
+                                               vsock->conf.guest_cid);
+    if (ret < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+static int vhost_vsock_set_running(VirtIODevice *vdev, int start)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    const VhostOps *vhost_ops = vvc->vhost_dev.vhost_ops;
+    int ret;
+
+    if (!vhost_ops->vhost_vsock_set_running) {
+        return -ENOSYS;
+    }
+
+    ret = vhost_ops->vhost_vsock_set_running(&vvc->vhost_dev, start);
+    if (ret < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+
+static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+    bool should_start = virtio_device_should_start(vdev, status);
+    int ret;
+
+    if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        ret = vhost_vsock_common_start(vdev);
+        if (ret < 0) {
+            return;
+        }
+
+        ret = vhost_vsock_set_running(vdev, 1);
+        if (ret < 0) {
+            vhost_vsock_common_stop(vdev);
+            error_report("Error starting vhost vsock: %d", -ret);
+            return;
+        }
+    } else {
+        ret = vhost_vsock_set_running(vdev, 0);
+        if (ret < 0) {
+            error_report("vhost vsock set running failed: %d", ret);
+            return;
+        }
+
+        vhost_vsock_common_stop(vdev);
+    }
+}
+
+static uint64_t vhost_vsock_get_features(VirtIODevice *vdev,
+                                         uint64_t requested_features,
+                                         Error **errp)
+{
+    return vhost_vsock_common_get_features(vdev, requested_features, errp);
+}
+
+static const VMStateDescription vmstate_virtio_vhost_vsock = {
+    .name = "virtio-vhost_vsock",
+    .minimum_version_id = VHOST_VSOCK_SAVEVM_VERSION,
+    .version_id = VHOST_VSOCK_SAVEVM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .pre_save = vhost_vsock_common_pre_save,
+    .post_load = vhost_vsock_common_post_load,
+};
+
+static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostVSock *vsock = VHOST_VSOCK(dev);
+    int vhostfd;
+    int ret;
+
+    /* Refuse to use reserved CID numbers */
+    if (vsock->conf.guest_cid <= 2) {
+        error_setg(errp, "guest-cid property must be greater than 2");
+        return;
+    }
+
+    if (vsock->conf.guest_cid > UINT32_MAX) {
+        error_setg(errp, "guest-cid property must be a 32-bit number");
+        return;
+    }
+
+    if (vsock->conf.vhostfd) {
+        vhostfd = monitor_fd_param(monitor_cur(), vsock->conf.vhostfd, errp);
+        if (vhostfd == -1) {
+            error_prepend(errp, "vhost-vsock: unable to parse vhostfd: ");
+            return;
+        }
+
+        if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
+            error_setg_errno(errp, errno,
+                             "vhost-vsock: unable to set non-blocking mode");
+            return;
+        }
+    } else {
+        vhostfd = open("/dev/vhost-vsock", O_RDWR);
+        if (vhostfd < 0) {
+            error_setg_errno(errp, errno,
+                             "vhost-vsock: failed to open vhost device");
+            return;
+        }
+
+        if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
+            error_setg_errno(errp, errno,
+                             "Failed to set FD nonblocking");
+            return;
+        }
+    }
+
+    vhost_vsock_common_realize(vdev);
+
+    ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd,
+                         VHOST_BACKEND_TYPE_KERNEL, 0, errp);
+    if (ret < 0) {
+        /*
+         * vhostfd is closed by vhost_dev_cleanup, which is called
+         * by vhost_dev_init on initialization error.
+         */
+        goto err_virtio;
+    }
+
+    ret = vhost_vsock_set_guest_cid(vdev);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid");
+        goto err_vhost_dev;
+    }
+
+    return;
+
+err_vhost_dev:
+    /* vhost_dev_cleanup() closes the vhostfd passed to vhost_dev_init() */
+    vhost_dev_cleanup(&vvc->vhost_dev);
+err_virtio:
+    vhost_vsock_common_unrealize(vdev);
+}
+
+static void vhost_vsock_device_unrealize(DeviceState *dev)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
+    /* This will stop vhost backend if appropriate. */
+    vhost_vsock_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&vvc->vhost_dev);
+    vhost_vsock_common_unrealize(vdev);
+}
+
+static Property vhost_vsock_properties[] = {
+    DEFINE_PROP_UINT64("guest-cid", VHostVSock, conf.guest_cid, 0),
+    DEFINE_PROP_STRING("vhostfd", VHostVSock, conf.vhostfd),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vhost_vsock_properties);
+    dc->vmsd = &vmstate_virtio_vhost_vsock;
+    vdc->realize = vhost_vsock_device_realize;
+    vdc->unrealize = vhost_vsock_device_unrealize;
+    vdc->get_features = vhost_vsock_get_features;
+    vdc->get_config = vhost_vsock_get_config;
+    vdc->set_status = vhost_vsock_set_status;
+}
+
+static const TypeInfo vhost_vsock_info = {
+    .name = TYPE_VHOST_VSOCK,
+    .parent = TYPE_VHOST_VSOCK_COMMON,
+    .instance_size = sizeof(VHostVSock),
+    .class_init = vhost_vsock_class_init,
+};
+
+static void vhost_vsock_register_types(void)
+{
+    type_register_static(&vhost_vsock_info);
+}
+
+type_init(vhost_vsock_register_types)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
new file mode 100644
index 00000000..7fb008bc
--- /dev/null
+++ b/hw/virtio/vhost.c
@@ -0,0 +1,1942 @@
+/*
+ * vhost support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/virtio/vhost.h"
+#include "qemu/atomic.h"
+#include "qemu/range.h"
+#include "qemu/error-report.h"
+#include "qemu/memfd.h"
+#include "standard-headers/linux/vhost_types.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "migration/blocker.h"
+#include "migration/qemu-file-types.h"
+#include "sysemu/dma.h"
+#include "trace.h"
+
+/* enabled until disconnected backend stabilizes */
+#define _VHOST_DEBUG 1
+
+#ifdef _VHOST_DEBUG
+#define VHOST_OPS_DEBUG(retval, fmt, ...) \
+    do { \
+        error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
+                     strerror(-retval), -retval); \
+    } while (0)
+#else
+#define VHOST_OPS_DEBUG(retval, fmt, ...) \
+    do { } while (0)
+#endif
+
+static struct vhost_log *vhost_log;
+static struct vhost_log *vhost_log_shm;
+
+static unsigned int used_memslots;
+static QLIST_HEAD(, vhost_dev) vhost_devices =
+    QLIST_HEAD_INITIALIZER(vhost_devices);
+
+bool vhost_has_free_slot(void)
+{
+    unsigned int slots_limit = ~0U;
+    struct vhost_dev *hdev;
+
+    QLIST_FOREACH(hdev, &vhost_devices, entry) {
+        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+        slots_limit = MIN(slots_limit, r);
+    }
+    return slots_limit > used_memslots;
+}
+
+static void vhost_dev_sync_region(struct vhost_dev *dev,
+                                  MemoryRegionSection *section,
+                                  uint64_t mfirst, uint64_t mlast,
+                                  uint64_t rfirst, uint64_t rlast)
+{
+    vhost_log_chunk_t *log = dev->log->log;
+
+    uint64_t start = MAX(mfirst, rfirst);
+    uint64_t end = MIN(mlast, rlast);
+    vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
+    vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
+    uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
+
+    if (end < start) {
+        return;
+    }
+    assert(end / VHOST_LOG_CHUNK < dev->log_size);
+    assert(start / VHOST_LOG_CHUNK < dev->log_size);
+
+    for (;from < to; ++from) {
+        vhost_log_chunk_t log;
+        /* We first check with non-atomic: much cheaper,
+         * and we expect non-dirty to be the common case. */
+        if (!*from) {
+            addr += VHOST_LOG_CHUNK;
+            continue;
+        }
+        /* Data must be read atomically. We don't really need barrier semantics
+         * but it's easier to use atomic_* than roll our own. */
+        log = qatomic_xchg(from, 0);
+        while (log) {
+            int bit = ctzl(log);
+            hwaddr page_addr;
+            hwaddr section_offset;
+            hwaddr mr_offset;
+            page_addr = addr + bit * VHOST_LOG_PAGE;
+            section_offset = page_addr - section->offset_within_address_space;
+            mr_offset = section_offset + section->offset_within_region;
+            memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
+            log &= ~(0x1ull << bit);
+        }
+        addr += VHOST_LOG_CHUNK;
+    }
+}
+
+static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+                                   MemoryRegionSection *section,
+                                   hwaddr first,
+                                   hwaddr last)
+{
+    int i;
+    hwaddr start_addr;
+    hwaddr end_addr;
+
+    if (!dev->log_enabled || !dev->started) {
+        return 0;
+    }
+    start_addr = section->offset_within_address_space;
+    end_addr = range_get_last(start_addr, int128_get64(section->size));
+    start_addr = MAX(first, start_addr);
+    end_addr = MIN(last, end_addr);
+
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        vhost_dev_sync_region(dev, section, start_addr, end_addr,
+                              reg->guest_phys_addr,
+                              range_get_last(reg->guest_phys_addr,
+                                             reg->memory_size));
+    }
+    for (i = 0; i < dev->nvqs; ++i) {
+        struct vhost_virtqueue *vq = dev->vqs + i;
+
+        if (!vq->used_phys && !vq->used_size) {
+            continue;
+        }
+
+        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
+                              range_get_last(vq->used_phys, vq->used_size));
+    }
+    return 0;
+}
+
+static void vhost_log_sync(MemoryListener *listener,
+                          MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
+}
+
+static void vhost_log_sync_range(struct vhost_dev *dev,
+                                 hwaddr first, hwaddr last)
+{
+    int i;
+    /* FIXME: this is N^2 in number of sections */
+    for (i = 0; i < dev->n_mem_sections; ++i) {
+        MemoryRegionSection *section = &dev->mem_sections[i];
+        vhost_sync_dirty_bitmap(dev, section, first, last);
+    }
+}
+
+static uint64_t vhost_get_log_size(struct vhost_dev *dev)
+{
+    uint64_t log_size = 0;
+    int i;
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        uint64_t last = range_get_last(reg->guest_phys_addr,
+                                       reg->memory_size);
+        log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
+    }
+    return log_size;
+}
+
+static int vhost_set_backend_type(struct vhost_dev *dev,
+                                  VhostBackendType backend_type)
+{
+    int r = 0;
+
+    switch (backend_type) {
+#ifdef CONFIG_VHOST_KERNEL
+    case VHOST_BACKEND_TYPE_KERNEL:
+        dev->vhost_ops = &kernel_ops;
+        break;
+#endif
+#ifdef CONFIG_VHOST_USER
+    case VHOST_BACKEND_TYPE_USER:
+        dev->vhost_ops = &user_ops;
+        break;
+#endif
+#ifdef CONFIG_VHOST_VDPA
+    case VHOST_BACKEND_TYPE_VDPA:
+        dev->vhost_ops = &vdpa_ops;
+        break;
+#endif
+    default:
+        error_report("Unknown vhost backend type");
+        r = -1;
+    }
+
+    return r;
+}
+
+static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
+{
+    Error *err = NULL;
+    struct vhost_log *log;
+    uint64_t logsize = size * sizeof(*(log->log));
+    int fd = -1;
+
+    log = g_new0(struct vhost_log, 1);
+    if (share) {
+        log->log = qemu_memfd_alloc("vhost-log", logsize,
+                                    F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                                    &fd, &err);
+        if (err) {
+            error_report_err(err);
+            g_free(log);
+            return NULL;
+        }
+        memset(log->log, 0, logsize);
+    } else {
+        log->log = g_malloc0(logsize);
+    }
+
+    log->size = size;
+    log->refcnt = 1;
+    log->fd = fd;
+
+    return log;
+}
+
+static struct vhost_log *vhost_log_get(uint64_t size, bool share)
+{
+    struct vhost_log *log = share ? vhost_log_shm : vhost_log;
+
+    if (!log || log->size != size) {
+        log = vhost_log_alloc(size, share);
+        if (share) {
+            vhost_log_shm = log;
+        } else {
+            vhost_log = log;
+        }
+    } else {
+        ++log->refcnt;
+    }
+
+    return log;
+}
+
+static void vhost_log_put(struct vhost_dev *dev, bool sync)
+{
+    struct vhost_log *log = dev->log;
+
+    if (!log) {
+        return;
+    }
+
+    --log->refcnt;
+    if (log->refcnt == 0) {
+        /* Sync only the range covered by the old log */
+        if (dev->log_size && sync) {
+            vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
+        }
+
+        if (vhost_log == log) {
+            g_free(log->log);
+            vhost_log = NULL;
+        } else if (vhost_log_shm == log) {
+            qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
+                            log->fd);
+            vhost_log_shm = NULL;
+        }
+
+        g_free(log);
+    }
+
+    dev->log = NULL;
+    dev->log_size = 0;
+}
+
+static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
+{
+    return dev->vhost_ops->vhost_requires_shm_log &&
+           dev->vhost_ops->vhost_requires_shm_log(dev);
+}
+
+static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
+{
+    struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
+    uint64_t log_base = (uintptr_t)log->log;
+    int r;
+
+    /* inform backend of log switching, this must be done before
+       releasing the current log, to ensure no logging is lost */
+    r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
+    }
+
+    vhost_log_put(dev, true);
+    dev->log = log;
+    dev->log_size = size;
+}
+
+static bool vhost_dev_has_iommu(struct vhost_dev *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+
+    /*
+     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
+     * incremental memory mapping API via IOTLB API. For platform that
+     * does not have IOMMU, there's no need to enable this feature
+     * which may cause unnecessary IOTLB miss/update transactions.
+     */
+    if (vdev) {
+        return virtio_bus_device_iommu_enabled(vdev) &&
+            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    } else {
+        return false;
+    }
+}
+
+static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
+                              hwaddr *plen, bool is_write)
+{
+    if (!vhost_dev_has_iommu(dev)) {
+        return cpu_physical_memory_map(addr, plen, is_write);
+    } else {
+        return (void *)(uintptr_t)addr;
+    }
+}
+
+static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
+                               hwaddr len, int is_write,
+                               hwaddr access_len)
+{
+    if (!vhost_dev_has_iommu(dev)) {
+        cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+    }
+}
+
+static int vhost_verify_ring_part_mapping(void *ring_hva,
+                                          uint64_t ring_gpa,
+                                          uint64_t ring_size,
+                                          void *reg_hva,
+                                          uint64_t reg_gpa,
+                                          uint64_t reg_size)
+{
+    uint64_t hva_ring_offset;
+    uint64_t ring_last = range_get_last(ring_gpa, ring_size);
+    uint64_t reg_last = range_get_last(reg_gpa, reg_size);
+
+    if (ring_last < reg_gpa || ring_gpa > reg_last) {
+        return 0;
+    }
+    /* check that whole ring's is mapped */
+    if (ring_last > reg_last) {
+        return -ENOMEM;
+    }
+    /* check that ring's MemoryRegion wasn't replaced */
+    hva_ring_offset = ring_gpa - reg_gpa;
+    if (ring_hva != reg_hva + hva_ring_offset) {
+        return -EBUSY;
+    }
+
+    return 0;
+}
+
+static int vhost_verify_ring_mappings(struct vhost_dev *dev,
+                                      void *reg_hva,
+                                      uint64_t reg_gpa,
+                                      uint64_t reg_size)
+{
+    int i, j;
+    int r = 0;
+    const char *part_name[] = {
+        "descriptor table",
+        "available ring",
+        "used ring"
+    };
+
+    if (vhost_dev_has_iommu(dev)) {
+        return 0;
+    }
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        struct vhost_virtqueue *vq = dev->vqs + i;
+
+        if (vq->desc_phys == 0) {
+            continue;
+        }
+
+        j = 0;
+        r = vhost_verify_ring_part_mapping(
+                vq->desc, vq->desc_phys, vq->desc_size,
+                reg_hva, reg_gpa, reg_size);
+        if (r) {
+            break;
+        }
+
+        j++;
+        r = vhost_verify_ring_part_mapping(
+                vq->avail, vq->avail_phys, vq->avail_size,
+                reg_hva, reg_gpa, reg_size);
+        if (r) {
+            break;
+        }
+
+        j++;
+        r = vhost_verify_ring_part_mapping(
+                vq->used, vq->used_phys, vq->used_size,
+                reg_hva, reg_gpa, reg_size);
+        if (r) {
+            break;
+        }
+    }
+
+    if (r == -ENOMEM) {
+        error_report("Unable to map %s for ring %d", part_name[j], i);
+    } else if (r == -EBUSY) {
+        error_report("%s relocated for ring %d", part_name[j], i);
+    }
+    return r;
+}
+
+/*
+ * vhost_section: identify sections needed for vhost access
+ *
+ * We only care about RAM sections here (where virtqueue and guest
+ * internals accessed by virtio might live). If we find one we still
+ * allow the backend to potentially filter it out of our list.
+ */
+static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
+        uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr);
+        uint8_t handled_dirty;
+
+        /*
+         * Kernel based vhost doesn't handle any block which is doing
+         * dirty-tracking other than migration for which it has
+         * specific logging support. However for TCG the kernel never
+         * gets involved anyway so we can also ignore it's
+         * self-modiying code detection flags. However a vhost-user
+         * client could still confuse a TCG guest if it re-writes
+         * executable memory that has already been translated.
+         */
+        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) |
+            (1 << DIRTY_MEMORY_CODE);
+
+        if (dirty_mask & ~handled_dirty) {
+            trace_vhost_reject_section(mr->name, 1);
+            return false;
+        }
+
+        if (dev->vhost_ops->vhost_backend_mem_section_filter &&
+            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
+            trace_vhost_reject_section(mr->name, 2);
+            return false;
+        }
+
+        trace_vhost_section(mr->name);
+        return true;
+    } else {
+        trace_vhost_reject_section(mr->name, 3);
+        return false;
+    }
+}
+
+static void vhost_begin(MemoryListener *listener)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    dev->tmp_sections = NULL;
+    dev->n_tmp_sections = 0;
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    MemoryRegionSection *old_sections;
+    int n_old_sections;
+    uint64_t log_size;
+    size_t regions_size;
+    int r;
+    int i;
+    bool changed = false;
+
+    /* Note we can be called before the device is started, but then
+     * starting the device calls set_mem_table, so we need to have
+     * built the data structures.
+     */
+    old_sections = dev->mem_sections;
+    n_old_sections = dev->n_mem_sections;
+    dev->mem_sections = dev->tmp_sections;
+    dev->n_mem_sections = dev->n_tmp_sections;
+
+    if (dev->n_mem_sections != n_old_sections) {
+        changed = true;
+    } else {
+        /* Same size, lets check the contents */
+        for (int i = 0; i < n_old_sections; i++) {
+            if (!MemoryRegionSection_eq(&old_sections[i],
+                                        &dev->mem_sections[i])) {
+                changed = true;
+                break;
+            }
+        }
+    }
+
+    trace_vhost_commit(dev->started, changed);
+    if (!changed) {
+        goto out;
+    }
+
+    /* Rebuild the regions list from the new sections list */
+    regions_size = offsetof(struct vhost_memory, regions) +
+                       dev->n_mem_sections * sizeof dev->mem->regions[0];
+    dev->mem = g_realloc(dev->mem, regions_size);
+    dev->mem->nregions = dev->n_mem_sections;
+    used_memslots = dev->mem->nregions;
+    for (i = 0; i < dev->n_mem_sections; i++) {
+        struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
+        struct MemoryRegionSection *mrs = dev->mem_sections + i;
+
+        cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
+        cur_vmr->memory_size     = int128_get64(mrs->size);
+        cur_vmr->userspace_addr  =
+            (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
+            mrs->offset_within_region;
+        cur_vmr->flags_padding   = 0;
+    }
+
+    if (!dev->started) {
+        goto out;
+    }
+
+    for (i = 0; i < dev->mem->nregions; i++) {
+        if (vhost_verify_ring_mappings(dev,
+                       (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
+                       dev->mem->regions[i].guest_phys_addr,
+                       dev->mem->regions[i].memory_size)) {
+            error_report("Verify ring failure on region %d", i);
+            abort();
+        }
+    }
+
+    if (!dev->log_enabled) {
+        r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
+        if (r < 0) {
+            VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
+        }
+        goto out;
+    }
+    log_size = vhost_get_log_size(dev);
+    /* We allocate an extra 4K bytes to log,
+     * to reduce the * number of reallocations. */
+#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
+    /* To log more, must increase log size before table update. */
+    if (dev->log_size < log_size) {
+        vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
+    }
+    r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
+    }
+    /* To log less, can only decrease log size after table update. */
+    if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
+        vhost_dev_log_resize(dev, log_size);
+    }
+
+out:
+    /* Deref the old list of sections, this must happen _after_ the
+     * vhost_set_mem_table to ensure the client isn't still using the
+     * section we're about to unref.
+     */
+    while (n_old_sections--) {
+        memory_region_unref(old_sections[n_old_sections].mr);
+    }
+    g_free(old_sections);
+    return;
+}
+
+/* Adds the section data to the tmp_section structure.
+ * It relies on the listener calling us in memory address order
+ * and for each region (via the _add and _nop methods) to
+ * join neighbours.
+ */
+static void vhost_region_add_section(struct vhost_dev *dev,
+                                     MemoryRegionSection *section)
+{
+    bool need_add = true;
+    uint64_t mrs_size = int128_get64(section->size);
+    uint64_t mrs_gpa = section->offset_within_address_space;
+    uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+                         section->offset_within_region;
+    RAMBlock *mrs_rb = section->mr->ram_block;
+
+    trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
+                                   mrs_host);
+
+    if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
+        /* Round the section to it's page size */
+        /* First align the start down to a page boundary */
+        size_t mrs_page = qemu_ram_pagesize(mrs_rb);
+        uint64_t alignage = mrs_host & (mrs_page - 1);
+        if (alignage) {
+            mrs_host -= alignage;
+            mrs_size += alignage;
+            mrs_gpa  -= alignage;
+        }
+        /* Now align the size up to a page boundary */
+        alignage = mrs_size & (mrs_page - 1);
+        if (alignage) {
+            mrs_size += mrs_page - alignage;
+        }
+        trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa,
+                                               mrs_size, mrs_host);
+    }
+
+    if (dev->n_tmp_sections) {
+        /* Since we already have at least one section, lets see if
+         * this extends it; since we're scanning in order, we only
+         * have to look at the last one, and the FlatView that calls
+         * us shouldn't have overlaps.
+         */
+        MemoryRegionSection *prev_sec = dev->tmp_sections +
+                                               (dev->n_tmp_sections - 1);
+        uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
+        uint64_t prev_size = int128_get64(prev_sec->size);
+        uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
+        uint64_t prev_host_start =
+                        (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
+                        prev_sec->offset_within_region;
+        uint64_t prev_host_end   = range_get_last(prev_host_start, prev_size);
+
+        if (mrs_gpa <= (prev_gpa_end + 1)) {
+            /* OK, looks like overlapping/intersecting - it's possible that
+             * the rounding to page sizes has made them overlap, but they should
+             * match up in the same RAMBlock if they do.
+             */
+            if (mrs_gpa < prev_gpa_start) {
+                error_report("%s:Section '%s' rounded to %"PRIx64
+                             " prior to previous '%s' %"PRIx64,
+                             __func__, section->mr->name, mrs_gpa,
+                             prev_sec->mr->name, prev_gpa_start);
+                /* A way to cleanly fail here would be better */
+                return;
+            }
+            /* Offset from the start of the previous GPA to this GPA */
+            size_t offset = mrs_gpa - prev_gpa_start;
+
+            if (prev_host_start + offset == mrs_host &&
+                section->mr == prev_sec->mr &&
+                (!dev->vhost_ops->vhost_backend_can_merge ||
+                 dev->vhost_ops->vhost_backend_can_merge(dev,
+                    mrs_host, mrs_size,
+                    prev_host_start, prev_size))) {
+                uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
+                need_add = false;
+                prev_sec->offset_within_address_space =
+                    MIN(prev_gpa_start, mrs_gpa);
+                prev_sec->offset_within_region =
+                    MIN(prev_host_start, mrs_host) -
+                    (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
+                prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
+                                               mrs_host));
+                trace_vhost_region_add_section_merge(section->mr->name,
+                                        int128_get64(prev_sec->size),
+                                        prev_sec->offset_within_address_space,
+                                        prev_sec->offset_within_region);
+            } else {
+                /* adjoining regions are fine, but overlapping ones with
+                 * different blocks/offsets shouldn't happen
+                 */
+                if (mrs_gpa != prev_gpa_end + 1) {
+                    error_report("%s: Overlapping but not coherent sections "
+                                 "at %"PRIx64,
+                                 __func__, mrs_gpa);
+                    return;
+                }
+            }
+        }
+    }
+
+    if (need_add) {
+        ++dev->n_tmp_sections;
+        dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
+                                    dev->n_tmp_sections);
+        dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
+        /* The flatview isn't stable and we don't use it, making it NULL
+         * means we can memcmp the list.
+         */
+        dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
+        memory_region_ref(section->mr);
+    }
+}
+
+/* Used for both add and nop callbacks */
+static void vhost_region_addnop(MemoryListener *listener,
+                                MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+
+    if (!vhost_section(dev, section)) {
+        return;
+    }
+    vhost_region_add_section(dev, section);
+}
+
+static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
+    struct vhost_dev *hdev = iommu->hdev;
+    hwaddr iova = iotlb->iova + iommu->iommu_offset;
+
+    if (vhost_backend_invalidate_device_iotlb(hdev, iova,
+                                              iotlb->addr_mask + 1)) {
+        error_report("Fail to invalidate device iotlb");
+    }
+}
+
+static void vhost_iommu_region_add(MemoryListener *listener,
+                                   MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         iommu_listener);
+    struct vhost_iommu *iommu;
+    Int128 end;
+    int iommu_idx;
+    IOMMUMemoryRegion *iommu_mr;
+    int ret;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+
+    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+
+    iommu = g_malloc0(sizeof(*iommu));
+    end = int128_add(int128_make64(section->offset_within_region),
+                     section->size);
+    end = int128_sub(end, int128_one());
+    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+                                                   MEMTXATTRS_UNSPECIFIED);
+    iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
+                        IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
+                        section->offset_within_region,
+                        int128_get64(end),
+                        iommu_idx);
+    iommu->mr = section->mr;
+    iommu->iommu_offset = section->offset_within_address_space -
+                          section->offset_within_region;
+    iommu->hdev = dev;
+    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
+    if (ret) {
+        /*
+         * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
+         * UNMAP legacy message
+         */
+        iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP;
+        memory_region_register_iommu_notifier(section->mr, &iommu->n,
+                                              &error_fatal);
+    }
+    QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
+    /* TODO: can replay help performance here? */
+}
+
+static void vhost_iommu_region_del(MemoryListener *listener,
+                                   MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         iommu_listener);
+    struct vhost_iommu *iommu;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+
+    QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
+        if (iommu->mr == section->mr &&
+            iommu->n.start == section->offset_within_region) {
+            memory_region_unregister_iommu_notifier(iommu->mr,
+                                                    &iommu->n);
+            QLIST_REMOVE(iommu, iommu_next);
+            g_free(iommu);
+            break;
+        }
+    }
+}
+
+static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
+                                    struct vhost_virtqueue *vq,
+                                    unsigned idx, bool enable_log)
+{
+    struct vhost_vring_addr addr;
+    int r;
+    memset(&addr, 0, sizeof(struct vhost_vring_addr));
+
+    if (dev->vhost_ops->vhost_vq_get_addr) {
+        r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
+        if (r < 0) {
+            VHOST_OPS_DEBUG(r, "vhost_vq_get_addr failed");
+            return r;
+        }
+    } else {
+        addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc;
+        addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail;
+        addr.used_user_addr = (uint64_t)(unsigned long)vq->used;
+    }
+    addr.index = idx;
+    addr.log_guest_addr = vq->used_phys;
+    addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
+    r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_addr failed");
+    }
+    return r;
+}
+
+static int vhost_dev_set_features(struct vhost_dev *dev,
+                                  bool enable_log)
+{
+    uint64_t features = dev->acked_features;
+    int r;
+    if (enable_log) {
+        features |= 0x1ULL << VHOST_F_LOG_ALL;
+    }
+    if (!vhost_dev_has_iommu(dev)) {
+        features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
+    }
+    if (dev->vhost_ops->vhost_force_iommu) {
+        if (dev->vhost_ops->vhost_force_iommu(dev) == true) {
+            features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
+       }
+    }
+    r = dev->vhost_ops->vhost_set_features(dev, features);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_features failed");
+        goto out;
+    }
+    if (dev->vhost_ops->vhost_set_backend_cap) {
+        r = dev->vhost_ops->vhost_set_backend_cap(dev);
+        if (r < 0) {
+            VHOST_OPS_DEBUG(r, "vhost_set_backend_cap failed");
+            goto out;
+        }
+    }
+
+out:
+    return r;
+}
+
+static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
+{
+    int r, i, idx;
+    hwaddr addr;
+
+    r = vhost_dev_set_features(dev, enable_log);
+    if (r < 0) {
+        goto err_features;
+    }
+    for (i = 0; i < dev->nvqs; ++i) {
+        idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
+        addr = virtio_queue_get_desc_addr(dev->vdev, idx);
+        if (!addr) {
+            /*
+             * The queue might not be ready for start. If this
+             * is the case there is no reason to continue the process.
+             * The similar logic is used by the vhost_virtqueue_start()
+             * routine.
+             */
+            continue;
+        }
+        r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
+                                     enable_log);
+        if (r < 0) {
+            goto err_vq;
+        }
+    }
+    return 0;
+err_vq:
+    for (; i >= 0; --i) {
+        idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
+        addr = virtio_queue_get_desc_addr(dev->vdev, idx);
+        if (!addr) {
+            continue;
+        }
+        vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
+                                 dev->log_enabled);
+    }
+    vhost_dev_set_features(dev, dev->log_enabled);
+err_features:
+    return r;
+}
+
+static int vhost_migration_log(MemoryListener *listener, bool enable)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    int r;
+    if (enable == dev->log_enabled) {
+        return 0;
+    }
+    if (!dev->started) {
+        dev->log_enabled = enable;
+        return 0;
+    }
+
+    r = 0;
+    if (!enable) {
+        r = vhost_dev_set_log(dev, false);
+        if (r < 0) {
+            goto check_dev_state;
+        }
+        vhost_log_put(dev, false);
+    } else {
+        vhost_dev_log_resize(dev, vhost_get_log_size(dev));
+        r = vhost_dev_set_log(dev, true);
+        if (r < 0) {
+            goto check_dev_state;
+        }
+    }
+
+check_dev_state:
+    dev->log_enabled = enable;
+    /*
+     * vhost-user-* devices could change their state during log
+     * initialization due to disconnect. So check dev state after
+     * vhost communication.
+     */
+    if (!dev->started) {
+        /*
+         * Since device is in the stopped state, it is okay for
+         * migration. Return success.
+         */
+        r = 0;
+    }
+    if (r) {
+        /* An error occurred. */
+        dev->log_enabled = false;
+    }
+
+    return r;
+}
+
+static void vhost_log_global_start(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, true);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_global_stop(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, false);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_start(MemoryListener *listener,
+                            MemoryRegionSection *section,
+                            int old, int new)
+{
+    /* FIXME: implement */
+}
+
+static void vhost_log_stop(MemoryListener *listener,
+                           MemoryRegionSection *section,
+                           int old, int new)
+{
+    /* FIXME: implement */
+}
+
+/* The vhost driver natively knows how to handle the vrings of non
+ * cross-endian legacy devices and modern devices. Only legacy devices
+ * exposed to a bi-endian guest may require the vhost driver to use a
+ * specific endianness.
+ */
+static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        return false;
+    }
+#if HOST_BIG_ENDIAN
+    return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
+#else
+    return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
+#endif
+}
+
+static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
+                                                   bool is_big_endian,
+                                                   int vhost_vq_index)
+{
+    int r;
+    struct vhost_vring_state s = {
+        .index = vhost_vq_index,
+        .num = is_big_endian
+    };
+
+    r = dev->vhost_ops->vhost_set_vring_endian(dev, &s);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_endian failed");
+    }
+    return r;
+}
+
+static int vhost_memory_region_lookup(struct vhost_dev *hdev,
+                                      uint64_t gpa, uint64_t *uaddr,
+                                      uint64_t *len)
+{
+    int i;
+
+    for (i = 0; i < hdev->mem->nregions; i++) {
+        struct vhost_memory_region *reg = hdev->mem->regions + i;
+
+        if (gpa >= reg->guest_phys_addr &&
+            reg->guest_phys_addr + reg->memory_size > gpa) {
+            *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
+            *len = reg->guest_phys_addr + reg->memory_size - gpa;
+            return 0;
+        }
+    }
+
+    return -EFAULT;
+}
+
+int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
+{
+    IOMMUTLBEntry iotlb;
+    uint64_t uaddr, len;
+    int ret = -EFAULT;
+
+    RCU_READ_LOCK_GUARD();
+
+    trace_vhost_iotlb_miss(dev, 1);
+
+    iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
+                                          iova, write,
+                                          MEMTXATTRS_UNSPECIFIED);
+    if (iotlb.target_as != NULL) {
+        ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
+                                         &uaddr, &len);
+        if (ret) {
+            trace_vhost_iotlb_miss(dev, 3);
+            error_report("Fail to lookup the translated address "
+                         "%"PRIx64, iotlb.translated_addr);
+            goto out;
+        }
+
+        len = MIN(iotlb.addr_mask + 1, len);
+        iova = iova & ~iotlb.addr_mask;
+
+        ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
+                                                len, iotlb.perm);
+        if (ret) {
+            trace_vhost_iotlb_miss(dev, 4);
+            error_report("Fail to update device iotlb");
+            goto out;
+        }
+    }
+
+    trace_vhost_iotlb_miss(dev, 2);
+
+out:
+    return ret;
+}
+
+int vhost_virtqueue_start(struct vhost_dev *dev,
+                          struct VirtIODevice *vdev,
+                          struct vhost_virtqueue *vq,
+                          unsigned idx)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
+    hwaddr s, l, a;
+    int r;
+    int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
+    struct vhost_vring_file file = {
+        .index = vhost_vq_index
+    };
+    struct vhost_vring_state state = {
+        .index = vhost_vq_index
+    };
+    struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
+
+    a = virtio_queue_get_desc_addr(vdev, idx);
+    if (a == 0) {
+        /* Queue might not be ready for start */
+        return 0;
+    }
+
+    vq->num = state.num = virtio_queue_get_num(vdev, idx);
+    r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
+    if (r) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
+        return r;
+    }
+
+    state.num = virtio_queue_get_last_avail_idx(vdev, idx);
+    r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
+    if (r) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_base failed");
+        return r;
+    }
+
+    if (vhost_needs_vring_endian(vdev)) {
+        r = vhost_virtqueue_set_vring_endian_legacy(dev,
+                                                    virtio_is_big_endian(vdev),
+                                                    vhost_vq_index);
+        if (r) {
+            return r;
+        }
+    }
+
+    vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
+    vq->desc_phys = a;
+    vq->desc = vhost_memory_map(dev, a, &l, false);
+    if (!vq->desc || l != s) {
+        r = -ENOMEM;
+        goto fail_alloc_desc;
+    }
+    vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
+    vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
+    vq->avail = vhost_memory_map(dev, a, &l, false);
+    if (!vq->avail || l != s) {
+        r = -ENOMEM;
+        goto fail_alloc_avail;
+    }
+    vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
+    vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
+    vq->used = vhost_memory_map(dev, a, &l, true);
+    if (!vq->used || l != s) {
+        r = -ENOMEM;
+        goto fail_alloc_used;
+    }
+
+    r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
+    if (r < 0) {
+        goto fail_alloc;
+    }
+
+    file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
+    r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
+    if (r) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_kick failed");
+        goto fail_kick;
+    }
+
+    /* Clear and discard previous events if any. */
+    event_notifier_test_and_clear(&vq->masked_notifier);
+
+    /* Init vring in unmasked state, unless guest_notifier_mask
+     * will do it later.
+     */
+    if (!vdev->use_guest_notifier_mask) {
+        /* TODO: check and handle errors. */
+        vhost_virtqueue_mask(dev, vdev, idx, false);
+    }
+
+    if (k->query_guest_notifiers &&
+        k->query_guest_notifiers(qbus->parent) &&
+        virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
+        file.fd = -1;
+        r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
+        if (r) {
+            goto fail_vector;
+        }
+    }
+
+    return 0;
+
+fail_vector:
+fail_kick:
+fail_alloc:
+    vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
+                       0, 0);
+fail_alloc_used:
+    vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+                       0, 0);
+fail_alloc_avail:
+    vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+                       0, 0);
+fail_alloc_desc:
+    return r;
+}
+
+void vhost_virtqueue_stop(struct vhost_dev *dev,
+                          struct VirtIODevice *vdev,
+                          struct vhost_virtqueue *vq,
+                          unsigned idx)
+{
+    int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
+    struct vhost_vring_state state = {
+        .index = vhost_vq_index,
+    };
+    int r;
+
+    if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
+        /* Don't stop the virtqueue which might have not been started */
+        return;
+    }
+
+    r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
+        /* Connection to the backend is broken, so let's sync internal
+         * last avail idx to the device used idx.
+         */
+        virtio_queue_restore_last_avail_idx(vdev, idx);
+    } else {
+        virtio_queue_set_last_avail_idx(vdev, idx, state.num);
+    }
+    virtio_queue_invalidate_signalled_used(vdev, idx);
+    virtio_queue_update_used_idx(vdev, idx);
+
+    /* In the cross-endian case, we need to reset the vring endianness to
+     * native as legacy devices expect so by default.
+     */
+    if (vhost_needs_vring_endian(vdev)) {
+        vhost_virtqueue_set_vring_endian_legacy(dev,
+                                                !virtio_is_big_endian(vdev),
+                                                vhost_vq_index);
+    }
+
+    vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
+                       1, virtio_queue_get_used_size(vdev, idx));
+    vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+                       0, virtio_queue_get_avail_size(vdev, idx));
+    vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+                       0, virtio_queue_get_desc_size(vdev, idx));
+}
+
+static void vhost_eventfd_add(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static void vhost_eventfd_del(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
+                                                int n, uint32_t timeout)
+{
+    int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
+    struct vhost_vring_state state = {
+        .index = vhost_vq_index,
+        .num = timeout,
+    };
+    int r;
+
+    if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
+        return -EINVAL;
+    }
+
+    r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
+    if (r) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_busyloop_timeout failed");
+        return r;
+    }
+
+    return 0;
+}
+
+static void vhost_virtqueue_error_notifier(EventNotifier *n)
+{
+    struct vhost_virtqueue *vq = container_of(n, struct vhost_virtqueue,
+                                              error_notifier);
+    struct vhost_dev *dev = vq->dev;
+    int index = vq - dev->vqs;
+
+    if (event_notifier_test_and_clear(n) && dev->vdev) {
+        VHOST_OPS_DEBUG(-EINVAL,  "vhost vring error in virtqueue %d",
+                        dev->vq_index + index);
+    }
+}
+
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+                                struct vhost_virtqueue *vq, int n)
+{
+    int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
+    struct vhost_vring_file file = {
+        .index = vhost_vq_index,
+    };
+    int r = event_notifier_init(&vq->masked_notifier, 0);
+    if (r < 0) {
+        return r;
+    }
+
+    file.fd = event_notifier_get_wfd(&vq->masked_notifier);
+    r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
+    if (r) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
+        goto fail_call;
+    }
+
+    vq->dev = dev;
+
+    if (dev->vhost_ops->vhost_set_vring_err) {
+        r = event_notifier_init(&vq->error_notifier, 0);
+        if (r < 0) {
+            goto fail_call;
+        }
+
+        file.fd = event_notifier_get_fd(&vq->error_notifier);
+        r = dev->vhost_ops->vhost_set_vring_err(dev, &file);
+        if (r) {
+            VHOST_OPS_DEBUG(r, "vhost_set_vring_err failed");
+            goto fail_err;
+        }
+
+        event_notifier_set_handler(&vq->error_notifier,
+                                   vhost_virtqueue_error_notifier);
+    }
+
+    return 0;
+
+fail_err:
+    event_notifier_cleanup(&vq->error_notifier);
+fail_call:
+    event_notifier_cleanup(&vq->masked_notifier);
+    return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
+{
+    event_notifier_cleanup(&vq->masked_notifier);
+    if (vq->dev->vhost_ops->vhost_set_vring_err) {
+        event_notifier_set_handler(&vq->error_notifier, NULL);
+        event_notifier_cleanup(&vq->error_notifier);
+    }
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
+                   VhostBackendType backend_type, uint32_t busyloop_timeout,
+                   Error **errp)
+{
+    uint64_t features;
+    int i, r, n_initialized_vqs = 0;
+
+    hdev->vdev = NULL;
+    hdev->migration_blocker = NULL;
+
+    r = vhost_set_backend_type(hdev, backend_type);
+    assert(r >= 0);
+
+    r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
+    if (r < 0) {
+        goto fail;
+    }
+
+    r = hdev->vhost_ops->vhost_set_owner(hdev);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "vhost_set_owner failed");
+        goto fail;
+    }
+
+    r = hdev->vhost_ops->vhost_get_features(hdev, &features);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "vhost_get_features failed");
+        goto fail;
+    }
+
+    for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
+        r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
+            goto fail;
+        }
+    }
+
+    if (busyloop_timeout) {
+        for (i = 0; i < hdev->nvqs; ++i) {
+            r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
+                                                     busyloop_timeout);
+            if (r < 0) {
+                error_setg_errno(errp, -r, "Failed to set busyloop timeout");
+                goto fail_busyloop;
+            }
+        }
+    }
+
+    hdev->features = features;
+
+    hdev->memory_listener = (MemoryListener) {
+        .name = "vhost",
+        .begin = vhost_begin,
+        .commit = vhost_commit,
+        .region_add = vhost_region_addnop,
+        .region_nop = vhost_region_addnop,
+        .log_start = vhost_log_start,
+        .log_stop = vhost_log_stop,
+        .log_sync = vhost_log_sync,
+        .log_global_start = vhost_log_global_start,
+        .log_global_stop = vhost_log_global_stop,
+        .eventfd_add = vhost_eventfd_add,
+        .eventfd_del = vhost_eventfd_del,
+        .priority = 10
+    };
+
+    hdev->iommu_listener = (MemoryListener) {
+        .name = "vhost-iommu",
+        .region_add = vhost_iommu_region_add,
+        .region_del = vhost_iommu_region_del,
+    };
+
+    if (hdev->migration_blocker == NULL) {
+        if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
+            error_setg(&hdev->migration_blocker,
+                       "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
+        } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) {
+            error_setg(&hdev->migration_blocker,
+                       "Migration disabled: failed to allocate shared memory");
+        }
+    }
+
+    if (hdev->migration_blocker != NULL) {
+        r = migrate_add_blocker(hdev->migration_blocker, errp);
+        if (r < 0) {
+            error_free(hdev->migration_blocker);
+            goto fail_busyloop;
+        }
+    }
+
+    hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
+    hdev->n_mem_sections = 0;
+    hdev->mem_sections = NULL;
+    hdev->log = NULL;
+    hdev->log_size = 0;
+    hdev->log_enabled = false;
+    hdev->started = false;
+    memory_listener_register(&hdev->memory_listener, &address_space_memory);
+    QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
+
+    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
+        error_setg(errp, "vhost backend memory slots limit is less"
+                   " than current number of present memory slots");
+        r = -EINVAL;
+        goto fail_busyloop;
+    }
+
+    return 0;
+
+fail_busyloop:
+    if (busyloop_timeout) {
+        while (--i >= 0) {
+            vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
+        }
+    }
+fail:
+    hdev->nvqs = n_initialized_vqs;
+    vhost_dev_cleanup(hdev);
+    return r;
+}
+
+void vhost_dev_cleanup(struct vhost_dev *hdev)
+{
+    int i;
+
+    trace_vhost_dev_cleanup(hdev);
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        vhost_virtqueue_cleanup(hdev->vqs + i);
+    }
+    if (hdev->mem) {
+        /* those are only safe after successful init */
+        memory_listener_unregister(&hdev->memory_listener);
+        QLIST_REMOVE(hdev, entry);
+    }
+    if (hdev->migration_blocker) {
+        migrate_del_blocker(hdev->migration_blocker);
+        error_free(hdev->migration_blocker);
+    }
+    g_free(hdev->mem);
+    g_free(hdev->mem_sections);
+    if (hdev->vhost_ops) {
+        hdev->vhost_ops->vhost_backend_cleanup(hdev);
+    }
+    assert(!hdev->log);
+
+    memset(hdev, 0, sizeof(struct vhost_dev));
+}
+
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int i, r, e;
+
+    /* We will pass the notifiers to the kernel, make sure that QEMU
+     * doesn't interfere.
+     */
+    r = virtio_device_grab_ioeventfd(vdev);
+    if (r < 0) {
+        error_report("binding does not support host notifiers");
+        goto fail;
+    }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
+                                         true);
+        if (r < 0) {
+            error_report("vhost VQ %d notifier binding failed: %d", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
+                                         false);
+        if (e < 0) {
+            error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
+        }
+        assert (e >= 0);
+        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
+    }
+    virtio_device_release_ioeventfd(vdev);
+fail:
+    return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int i, r;
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
+                                         false);
+        if (r < 0) {
+            error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
+        }
+        assert (r >= 0);
+        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
+    }
+    virtio_device_release_ioeventfd(vdev);
+}
+
+/* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
+{
+    struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
+    assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
+    return event_notifier_test_and_clear(&vq->masked_notifier);
+}
+
+/* Mask/unmask events from this vq. */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+                         bool mask)
+{
+    struct VirtQueue *vvq = virtio_get_queue(vdev, n);
+    int r, index = n - hdev->vq_index;
+    struct vhost_vring_file file;
+
+    /* should only be called after backend is connected */
+    assert(hdev->vhost_ops);
+
+    if (mask) {
+        assert(vdev->use_guest_notifier_mask);
+        file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier);
+    } else {
+        file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq));
+    }
+
+    file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
+    r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
+    }
+}
+
+uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
+                            uint64_t features)
+{
+    const int *bit = feature_bits;
+    while (*bit != VHOST_INVALID_FEATURE_BIT) {
+        uint64_t bit_mask = (1ULL << *bit);
+        if (!(hdev->features & bit_mask)) {
+            features &= ~bit_mask;
+        }
+        bit++;
+    }
+    return features;
+}
+
+void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
+                        uint64_t features)
+{
+    const int *bit = feature_bits;
+    while (*bit != VHOST_INVALID_FEATURE_BIT) {
+        uint64_t bit_mask = (1ULL << *bit);
+        if (features & bit_mask) {
+            hdev->acked_features |= bit_mask;
+        }
+        bit++;
+    }
+}
+
+int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
+                         uint32_t config_len, Error **errp)
+{
+    assert(hdev->vhost_ops);
+
+    if (hdev->vhost_ops->vhost_get_config) {
+        return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
+                                                 errp);
+    }
+
+    error_setg(errp, "vhost_get_config not implemented");
+    return -ENOSYS;
+}
+
+int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
+                         uint32_t offset, uint32_t size, uint32_t flags)
+{
+    assert(hdev->vhost_ops);
+
+    if (hdev->vhost_ops->vhost_set_config) {
+        return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
+                                                 size, flags);
+    }
+
+    return -ENOSYS;
+}
+
+void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
+                                   const VhostDevConfigOps *ops)
+{
+    hdev->config_ops = ops;
+}
+
+void vhost_dev_free_inflight(struct vhost_inflight *inflight)
+{
+    if (inflight && inflight->addr) {
+        qemu_memfd_free(inflight->addr, inflight->size, inflight->fd);
+        inflight->addr = NULL;
+        inflight->fd = -1;
+    }
+}
+
+static int vhost_dev_resize_inflight(struct vhost_inflight *inflight,
+                                     uint64_t new_size)
+{
+    Error *err = NULL;
+    int fd = -1;
+    void *addr = qemu_memfd_alloc("vhost-inflight", new_size,
+                                  F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                                  &fd, &err);
+
+    if (err) {
+        error_report_err(err);
+        return -ENOMEM;
+    }
+
+    vhost_dev_free_inflight(inflight);
+    inflight->offset = 0;
+    inflight->addr = addr;
+    inflight->fd = fd;
+    inflight->size = new_size;
+
+    return 0;
+}
+
+void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f)
+{
+    if (inflight->addr) {
+        qemu_put_be64(f, inflight->size);
+        qemu_put_be16(f, inflight->queue_size);
+        qemu_put_buffer(f, inflight->addr, inflight->size);
+    } else {
+        qemu_put_be64(f, 0);
+    }
+}
+
+int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
+{
+    uint64_t size;
+
+    size = qemu_get_be64(f);
+    if (!size) {
+        return 0;
+    }
+
+    if (inflight->size != size) {
+        int ret = vhost_dev_resize_inflight(inflight, size);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    inflight->queue_size = qemu_get_be16(f);
+
+    qemu_get_buffer(f, inflight->addr, size);
+
+    return 0;
+}
+
+int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int r;
+
+    if (hdev->vhost_ops->vhost_get_inflight_fd == NULL ||
+        hdev->vhost_ops->vhost_set_inflight_fd == NULL) {
+        return 0;
+    }
+
+    hdev->vdev = vdev;
+
+    r = vhost_dev_set_features(hdev, hdev->log_enabled);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_dev_prepare_inflight failed");
+        return r;
+    }
+
+    return 0;
+}
+
+int vhost_dev_set_inflight(struct vhost_dev *dev,
+                           struct vhost_inflight *inflight)
+{
+    int r;
+
+    if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
+        r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
+        if (r) {
+            VHOST_OPS_DEBUG(r, "vhost_set_inflight_fd failed");
+            return r;
+        }
+    }
+
+    return 0;
+}
+
+int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
+                           struct vhost_inflight *inflight)
+{
+    int r;
+
+    if (dev->vhost_ops->vhost_get_inflight_fd) {
+        r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
+        if (r) {
+            VHOST_OPS_DEBUG(r, "vhost_get_inflight_fd failed");
+            return r;
+        }
+    }
+
+    return 0;
+}
+
+static int vhost_dev_set_vring_enable(struct vhost_dev *hdev, int enable)
+{
+    if (!hdev->vhost_ops->vhost_set_vring_enable) {
+        return 0;
+    }
+
+    /*
+     * For vhost-user devices, if VHOST_USER_F_PROTOCOL_FEATURES has not
+     * been negotiated, the rings start directly in the enabled state, and
+     * .vhost_set_vring_enable callback will fail since
+     * VHOST_USER_SET_VRING_ENABLE is not supported.
+     */
+    if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
+        !virtio_has_feature(hdev->backend_features,
+                            VHOST_USER_F_PROTOCOL_FEATURES)) {
+        return 0;
+    }
+
+    return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable);
+}
+
+/* Host notifiers must be enabled at this point. */
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+{
+    int i, r;
+
+    /* should only be called after backend is connected */
+    assert(hdev->vhost_ops);
+
+    trace_vhost_dev_start(hdev, vdev->name, vrings);
+
+    vdev->vhost_started = true;
+    hdev->started = true;
+    hdev->vdev = vdev;
+
+    r = vhost_dev_set_features(hdev, hdev->log_enabled);
+    if (r < 0) {
+        goto fail_features;
+    }
+
+    if (vhost_dev_has_iommu(hdev)) {
+        memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
+    }
+
+    r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
+    if (r < 0) {
+        VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
+        goto fail_mem;
+    }
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vhost_virtqueue_start(hdev,
+                                  vdev,
+                                  hdev->vqs + i,
+                                  hdev->vq_index + i);
+        if (r < 0) {
+            goto fail_vq;
+        }
+    }
+
+    if (hdev->log_enabled) {
+        uint64_t log_base;
+
+        hdev->log_size = vhost_get_log_size(hdev);
+        hdev->log = vhost_log_get(hdev->log_size,
+                                  vhost_dev_log_is_shared(hdev));
+        log_base = (uintptr_t)hdev->log->log;
+        r = hdev->vhost_ops->vhost_set_log_base(hdev,
+                                                hdev->log_size ? log_base : 0,
+                                                hdev->log);
+        if (r < 0) {
+            VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
+            goto fail_log;
+        }
+    }
+    if (vrings) {
+        r = vhost_dev_set_vring_enable(hdev, true);
+        if (r) {
+            goto fail_log;
+        }
+    }
+    if (hdev->vhost_ops->vhost_dev_start) {
+        r = hdev->vhost_ops->vhost_dev_start(hdev, true);
+        if (r) {
+            goto fail_start;
+        }
+    }
+    if (vhost_dev_has_iommu(hdev) &&
+        hdev->vhost_ops->vhost_set_iotlb_callback) {
+            hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
+
+        /* Update used ring information for IOTLB to work correctly,
+         * vhost-kernel code requires for this.*/
+        for (i = 0; i < hdev->nvqs; ++i) {
+            struct vhost_virtqueue *vq = hdev->vqs + i;
+            vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+        }
+    }
+    return 0;
+fail_start:
+    if (vrings) {
+        vhost_dev_set_vring_enable(hdev, false);
+    }
+fail_log:
+    vhost_log_put(hdev, false);
+fail_vq:
+    while (--i >= 0) {
+        vhost_virtqueue_stop(hdev,
+                             vdev,
+                             hdev->vqs + i,
+                             hdev->vq_index + i);
+    }
+
+fail_mem:
+fail_features:
+    vdev->vhost_started = false;
+    hdev->started = false;
+    return r;
+}
+
+/* Host notifiers must be enabled at this point. */
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+{
+    int i;
+
+    /* should only be called after backend is connected */
+    assert(hdev->vhost_ops);
+
+    trace_vhost_dev_stop(hdev, vdev->name, vrings);
+
+    if (hdev->vhost_ops->vhost_dev_start) {
+        hdev->vhost_ops->vhost_dev_start(hdev, false);
+    }
+    if (vrings) {
+        vhost_dev_set_vring_enable(hdev, false);
+    }
+    for (i = 0; i < hdev->nvqs; ++i) {
+        vhost_virtqueue_stop(hdev,
+                             vdev,
+                             hdev->vqs + i,
+                             hdev->vq_index + i);
+    }
+
+    if (vhost_dev_has_iommu(hdev)) {
+        if (hdev->vhost_ops->vhost_set_iotlb_callback) {
+            hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
+        }
+        memory_listener_unregister(&hdev->iommu_listener);
+    }
+    vhost_log_put(hdev, true);
+    hdev->started = false;
+    vdev->vhost_started = false;
+    hdev->vdev = NULL;
+}
+
+int vhost_net_set_backend(struct vhost_dev *hdev,
+                          struct vhost_vring_file *file)
+{
+    if (hdev->vhost_ops->vhost_net_set_backend) {
+        return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
+    }
+
+    return -ENOSYS;
+}
diff --git a/hw/virtio/virtio-9p-pci.c b/hw/virtio/virtio-9p-pci.c
new file mode 100644
index 00000000..94c14f0b
--- /dev/null
+++ b/hw/virtio/virtio-9p-pci.c
@@ -0,0 +1,91 @@
+/*
+ * Virtio 9p PCI Bindings
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/9pfs/virtio-9p.h"
+#include "hw/qdev-properties.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+/*
+ * virtio-9p-pci: This extends VirtioPCIProxy.
+ */
+
+#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base"
+typedef struct V9fsPCIState V9fsPCIState;
+DECLARE_INSTANCE_CHECKER(V9fsPCIState, VIRTIO_9P_PCI,
+                         TYPE_VIRTIO_9P_PCI)
+
+struct V9fsPCIState {
+    VirtIOPCIProxy parent_obj;
+    V9fsVirtioState vdev;
+};
+
+static void virtio_9p_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    V9fsPCIState *dev = VIRTIO_9P_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static Property virtio_9p_pci_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_9p_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+
+    k->realize = virtio_9p_pci_realize;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = 0x2;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, virtio_9p_pci_properties);
+}
+
+static void virtio_9p_pci_instance_init(Object *obj)
+{
+    V9fsPCIState *dev = VIRTIO_9P_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_9P);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = {
+    .base_name              = TYPE_VIRTIO_9P_PCI,
+    .generic_name           = "virtio-9p-pci",
+    .transitional_name      = "virtio-9p-pci-transitional",
+    .non_transitional_name  = "virtio-9p-pci-non-transitional",
+    .instance_size = sizeof(V9fsPCIState),
+    .instance_init = virtio_9p_pci_instance_init,
+    .class_init    = virtio_9p_pci_class_init,
+};
+
+static void virtio_9p_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_9p_pci_info);
+}
+
+type_init(virtio_9p_pci_register)
diff --git a/hw/virtio/virtio-balloon-pci.c b/hw/virtio/virtio-balloon-pci.c
new file mode 100644
index 00000000..ce2645ba
--- /dev/null
+++ b/hw/virtio/virtio-balloon-pci.c
@@ -0,0 +1,88 @@
+/*
+ * Virtio balloon PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-balloon.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIOBalloonPCI VirtIOBalloonPCI;
+
+/*
+ * virtio-balloon-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOBalloonPCI, VIRTIO_BALLOON_PCI,
+                         TYPE_VIRTIO_BALLOON_PCI)
+
+struct VirtIOBalloonPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOBalloon vdev;
+};
+
+static void virtio_balloon_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOBalloonPCI *dev = VIRTIO_BALLOON_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    vpci_dev->class_code = PCI_CLASS_OTHERS;
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_balloon_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = virtio_balloon_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BALLOON;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void virtio_balloon_pci_instance_init(Object *obj)
+{
+    VirtIOBalloonPCI *dev = VIRTIO_BALLOON_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_BALLOON);
+    object_property_add_alias(obj, "guest-stats", OBJECT(&dev->vdev),
+                                  "guest-stats");
+    object_property_add_alias(obj, "guest-stats-polling-interval",
+                              OBJECT(&dev->vdev),
+                              "guest-stats-polling-interval");
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = {
+    .base_name             = TYPE_VIRTIO_BALLOON_PCI,
+    .generic_name          = "virtio-balloon-pci",
+    .transitional_name     = "virtio-balloon-pci-transitional",
+    .non_transitional_name = "virtio-balloon-pci-non-transitional",
+    .instance_size = sizeof(VirtIOBalloonPCI),
+    .instance_init = virtio_balloon_pci_instance_init,
+    .class_init    = virtio_balloon_pci_class_init,
+};
+
+static void virtio_balloon_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_balloon_pci_info);
+}
+
+type_init(virtio_balloon_pci_register)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
new file mode 100644
index 00000000..73ac5eb6
--- /dev/null
+++ b/hw/virtio/virtio-balloon.c
@@ -0,0 +1,1079 @@
+/*
+ * Virtio Balloon Device
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/module.h"
+#include "qemu/timer.h"
+#include "qemu/madvise.h"
+#include "hw/virtio/virtio.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "sysemu/balloon.h"
+#include "hw/virtio/virtio-balloon.h"
+#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/visitor.h"
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+
+#define BALLOON_PAGE_SIZE  (1 << VIRTIO_BALLOON_PFN_SHIFT)
+
+typedef struct PartiallyBalloonedPage {
+    ram_addr_t base_gpa;
+    unsigned long *bitmap;
+} PartiallyBalloonedPage;
+
+static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp)
+{
+    if (!pbp->bitmap) {
+        return;
+    }
+    g_free(pbp->bitmap);
+    pbp->bitmap = NULL;
+}
+
+static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp,
+                                     ram_addr_t base_gpa,
+                                     long subpages)
+{
+    pbp->base_gpa = base_gpa;
+    pbp->bitmap = bitmap_new(subpages);
+}
+
+static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp,
+                                       ram_addr_t base_gpa)
+{
+    return pbp->base_gpa == base_gpa;
+}
+
+static bool virtio_balloon_inhibited(void)
+{
+    /*
+     * Postcopy cannot deal with concurrent discards,
+     * so it's special, as well as background snapshots.
+     */
+    return ram_block_discard_is_disabled() || migration_in_incoming_postcopy() ||
+            migration_in_bg_snapshot();
+}
+
+static void balloon_inflate_page(VirtIOBalloon *balloon,
+                                 MemoryRegion *mr, hwaddr mr_offset,
+                                 PartiallyBalloonedPage *pbp)
+{
+    void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
+    ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
+    RAMBlock *rb;
+    size_t rb_page_size;
+    int subpages;
+
+    /* XXX is there a better way to get to the RAMBlock than via a
+     * host address? */
+    rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+    rb_page_size = qemu_ram_pagesize(rb);
+
+    if (rb_page_size == BALLOON_PAGE_SIZE) {
+        /* Easy case */
+
+        ram_block_discard_range(rb, rb_offset, rb_page_size);
+        /* We ignore errors from ram_block_discard_range(), because it
+         * has already reported them, and failing to discard a balloon
+         * page is not fatal */
+        return;
+    }
+
+    /* Hard case
+     *
+     * We've put a piece of a larger host page into the balloon - we
+     * need to keep track until we have a whole host page to
+     * discard
+     */
+    warn_report_once(
+"Balloon used with backing page size > 4kiB, this may not be reliable");
+
+    rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size);
+    subpages = rb_page_size / BALLOON_PAGE_SIZE;
+    base_gpa = memory_region_get_ram_addr(mr) + mr_offset -
+               (rb_offset - rb_aligned_offset);
+
+    if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) {
+        /* We've partially ballooned part of a host page, but now
+         * we're trying to balloon part of a different one.  Too hard,
+         * give up on the old partial page */
+        virtio_balloon_pbp_free(pbp);
+    }
+
+    if (!pbp->bitmap) {
+        virtio_balloon_pbp_alloc(pbp, base_gpa, subpages);
+    }
+
+    set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE,
+            pbp->bitmap);
+
+    if (bitmap_full(pbp->bitmap, subpages)) {
+        /* We've accumulated a full host page, we can actually discard
+         * it now */
+
+        ram_block_discard_range(rb, rb_aligned_offset, rb_page_size);
+        /* We ignore errors from ram_block_discard_range(), because it
+         * has already reported them, and failing to discard a balloon
+         * page is not fatal */
+        virtio_balloon_pbp_free(pbp);
+    }
+}
+
+static void balloon_deflate_page(VirtIOBalloon *balloon,
+                                 MemoryRegion *mr, hwaddr mr_offset)
+{
+    void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
+    ram_addr_t rb_offset;
+    RAMBlock *rb;
+    size_t rb_page_size;
+    void *host_addr;
+    int ret;
+
+    /* XXX is there a better way to get to the RAMBlock than via a
+     * host address? */
+    rb = qemu_ram_block_from_host(addr, false, &rb_offset);
+    rb_page_size = qemu_ram_pagesize(rb);
+
+    host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
+
+    /* When a page is deflated, we hint the whole host page it lives
+     * on, since we can't do anything smaller */
+    ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
+    if (ret != 0) {
+        warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
+                    strerror(errno));
+        /* Otherwise ignore, failing to page hint shouldn't be fatal */
+    }
+}
+
+static const char *balloon_stat_names[] = {
+   [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
+   [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
+   [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
+   [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
+   [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
+   [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
+   [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
+   [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
+   [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
+   [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
+   [VIRTIO_BALLOON_S_NR] = NULL
+};
+
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device initialization and before
+ * updating to a set of newly-generated stats.  This will ensure that no
+ * stale values stick around in case the guest reports a subset of the supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+    int i;
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+}
+
+static bool balloon_stats_supported(const VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
+}
+
+static bool balloon_stats_enabled(const VirtIOBalloon *s)
+{
+    return s->stats_poll_interval > 0;
+}
+
+static void balloon_stats_destroy_timer(VirtIOBalloon *s)
+{
+    if (balloon_stats_enabled(s)) {
+        timer_free(s->stats_timer);
+        s->stats_timer = NULL;
+        s->stats_poll_interval = 0;
+    }
+}
+
+static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
+{
+    timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
+}
+
+static void balloon_stats_poll_cb(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
+        /* re-schedule */
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+        return;
+    }
+
+    virtqueue_push(s->svq, s->stats_vq_elem, 0);
+    virtio_notify(vdev, s->svq);
+    g_free(s->stats_vq_elem);
+    s->stats_vq_elem = NULL;
+}
+
+static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
+                                  void *opaque, Error **errp)
+{
+    Error *err = NULL;
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+    int i;
+
+    if (!visit_start_struct(v, name, NULL, 0, &err)) {
+        goto out;
+    }
+    if (!visit_type_int(v, "last-update", &s->stats_last_update, &err)) {
+        goto out_end;
+    }
+
+    if (!visit_start_struct(v, "stats", NULL, 0, &err)) {
+        goto out_end;
+    }
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
+        if (!visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err)) {
+            goto out_nested;
+        }
+    }
+    visit_check_struct(v, &err);
+out_nested:
+    visit_end_struct(v, NULL);
+
+    if (!err) {
+        visit_check_struct(v, &err);
+    }
+out_end:
+    visit_end_struct(v, NULL);
+out:
+    error_propagate(errp, err);
+}
+
+static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+    visit_type_int(v, name, &s->stats_poll_interval, errp);
+}
+
+static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+    int64_t value;
+
+    if (!visit_type_int(v, name, &value, errp)) {
+        return;
+    }
+
+    if (value < 0) {
+        error_setg(errp, "timer value must be greater than zero");
+        return;
+    }
+
+    if (value > UINT32_MAX) {
+        error_setg(errp, "timer value is too big");
+        return;
+    }
+
+    if (value == s->stats_poll_interval) {
+        return;
+    }
+
+    if (value == 0) {
+        /* timer=0 disables the timer */
+        balloon_stats_destroy_timer(s);
+        return;
+    }
+
+    if (balloon_stats_enabled(s)) {
+        /* timer interval change */
+        s->stats_poll_interval = value;
+        balloon_stats_change_timer(s, value);
+        return;
+    }
+
+    /* create a new timer */
+    g_assert(s->stats_timer == NULL);
+    s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
+    s->stats_poll_interval = value;
+    balloon_stats_change_timer(s, 0);
+}
+
+static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+    VirtQueueElement *elem;
+
+    while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) {
+        unsigned int i;
+
+        /*
+         * When we discard the page it has the effect of removing the page
+         * from the hypervisor itself and causing it to be zeroed when it
+         * is returned to us. So we must not discard the page if it is
+         * accessible by another device or process, or if the guest is
+         * expecting it to retain a non-zero value.
+         */
+        if (virtio_balloon_inhibited() || dev->poison_val) {
+            goto skip_element;
+        }
+
+        for (i = 0; i < elem->in_num; i++) {
+            void *addr = elem->in_sg[i].iov_base;
+            size_t size = elem->in_sg[i].iov_len;
+            ram_addr_t ram_offset;
+            RAMBlock *rb;
+
+            /*
+             * There is no need to check the memory section to see if
+             * it is ram/readonly/romd like there is for handle_output
+             * below. If the region is not meant to be written to then
+             * address_space_map will have allocated a bounce buffer
+             * and it will be freed in address_space_unmap and trigger
+             * and unassigned_mem_write before failing to copy over the
+             * buffer. If more than one bad descriptor is provided it
+             * will return NULL after the first bounce buffer and fail
+             * to map any resources.
+             */
+            rb = qemu_ram_block_from_host(addr, false, &ram_offset);
+            if (!rb) {
+                trace_virtio_balloon_bad_addr(elem->in_addr[i]);
+                continue;
+            }
+
+            /*
+             * For now we will simply ignore unaligned memory regions, or
+             * regions that overrun the end of the RAMBlock.
+             */
+            if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) ||
+                (ram_offset + size) > qemu_ram_get_used_length(rb)) {
+                continue;
+            }
+
+            ram_block_discard_range(rb, ram_offset, size);
+        }
+
+skip_element:
+        virtqueue_push(vq, elem, 0);
+        virtio_notify(vdev, vq);
+        g_free(elem);
+    }
+}
+
+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    VirtQueueElement *elem;
+    MemoryRegionSection section;
+
+    for (;;) {
+        PartiallyBalloonedPage pbp = {};
+        size_t offset = 0;
+        uint32_t pfn;
+
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            break;
+        }
+
+        while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
+            unsigned int p = virtio_ldl_p(vdev, &pfn);
+            hwaddr pa;
+
+            pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
+            offset += 4;
+
+            section = memory_region_find(get_system_memory(), pa,
+                                         BALLOON_PAGE_SIZE);
+            if (!section.mr) {
+                trace_virtio_balloon_bad_addr(pa);
+                continue;
+            }
+            if (!memory_region_is_ram(section.mr) ||
+                memory_region_is_rom(section.mr) ||
+                memory_region_is_romd(section.mr)) {
+                trace_virtio_balloon_bad_addr(pa);
+                memory_region_unref(section.mr);
+                continue;
+            }
+
+            trace_virtio_balloon_handle_output(memory_region_name(section.mr),
+                                               pa);
+            if (!virtio_balloon_inhibited()) {
+                if (vq == s->ivq) {
+                    balloon_inflate_page(s, section.mr,
+                                         section.offset_within_region, &pbp);
+                } else if (vq == s->dvq) {
+                    balloon_deflate_page(s, section.mr, section.offset_within_region);
+                } else {
+                    g_assert_not_reached();
+                }
+            }
+            memory_region_unref(section.mr);
+        }
+
+        virtqueue_push(vq, elem, 0);
+        virtio_notify(vdev, vq);
+        g_free(elem);
+        virtio_balloon_pbp_free(&pbp);
+    }
+}
+
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    VirtQueueElement *elem;
+    VirtIOBalloonStat stat;
+    size_t offset = 0;
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        goto out;
+    }
+
+    if (s->stats_vq_elem != NULL) {
+        /* This should never happen if the driver follows the spec. */
+        virtqueue_push(vq, s->stats_vq_elem, 0);
+        virtio_notify(vdev, vq);
+        g_free(s->stats_vq_elem);
+    }
+
+    s->stats_vq_elem = elem;
+
+    /* Initialize the stats to get rid of any stale values.  This is only
+     * needed to handle the case where a guest supports fewer stats than it
+     * used to (ie. it has booted into an old kernel).
+     */
+    reset_stats(s);
+
+    while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
+           == sizeof(stat)) {
+        uint16_t tag = virtio_tswap16(vdev, stat.tag);
+        uint64_t val = virtio_tswap64(vdev, stat.val);
+
+        offset += sizeof(stat);
+        if (tag < VIRTIO_BALLOON_S_NR)
+            s->stats[tag] = val;
+    }
+    s->stats_vq_offset = offset;
+    s->stats_last_update = g_get_real_time() / G_USEC_PER_SEC;
+
+out:
+    if (balloon_stats_enabled(s)) {
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+    }
+}
+
+static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
+                                               VirtQueue *vq)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+    qemu_bh_schedule(s->free_page_bh);
+}
+
+static bool get_free_page_hints(VirtIOBalloon *dev)
+{
+    VirtQueueElement *elem;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+    bool ret = true;
+    int i;
+
+    while (dev->block_iothread) {
+        qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
+    }
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        return false;
+    }
+
+    if (elem->out_num) {
+        uint32_t id;
+        size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
+                                 &id, sizeof(id));
+
+        virtio_tswap32s(vdev, &id);
+        if (unlikely(size != sizeof(id))) {
+            virtio_error(vdev, "received an incorrect cmd id");
+            ret = false;
+            goto out;
+        }
+        if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED &&
+            id == dev->free_page_hint_cmd_id) {
+            dev->free_page_hint_status = FREE_PAGE_HINT_S_START;
+        } else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
+            /*
+             * Stop the optimization only when it has started. This
+             * avoids a stale stop sign for the previous command.
+             */
+            dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
+        }
+    }
+
+    if (elem->in_num && dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
+        for (i = 0; i < elem->in_num; i++) {
+            qemu_guest_free_page_hint(elem->in_sg[i].iov_base,
+                                      elem->in_sg[i].iov_len);
+        }
+    }
+
+out:
+    virtqueue_push(vq, elem, 0);
+    g_free(elem);
+    return ret;
+}
+
+static void virtio_ballloon_get_free_page_hints(void *opaque)
+{
+    VirtIOBalloon *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtQueue *vq = dev->free_page_vq;
+    bool continue_to_get_hints;
+
+    do {
+        qemu_mutex_lock(&dev->free_page_lock);
+        virtio_queue_set_notification(vq, 0);
+        continue_to_get_hints = get_free_page_hints(dev);
+        qemu_mutex_unlock(&dev->free_page_lock);
+        virtio_notify(vdev, vq);
+      /*
+       * Start to poll the vq once the hinting started. Otherwise, continue
+       * only when there are entries on the vq, which need to be given back.
+       */
+    } while (continue_to_get_hints ||
+             dev->free_page_hint_status == FREE_PAGE_HINT_S_START);
+    virtio_queue_set_notification(vq, 1);
+}
+
+static bool virtio_balloon_free_page_support(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
+}
+
+static void virtio_balloon_free_page_start(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    qemu_mutex_lock(&s->free_page_lock);
+
+    if (s->free_page_hint_cmd_id == UINT_MAX) {
+        s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
+    } else {
+        s->free_page_hint_cmd_id++;
+    }
+
+    s->free_page_hint_status = FREE_PAGE_HINT_S_REQUESTED;
+    qemu_mutex_unlock(&s->free_page_lock);
+
+    virtio_notify_config(vdev);
+}
+
+static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->free_page_hint_status != FREE_PAGE_HINT_S_STOP) {
+        /*
+         * The lock also guarantees us that the
+         * virtio_ballloon_get_free_page_hints exits after the
+         * free_page_hint_status is set to S_STOP.
+         */
+        qemu_mutex_lock(&s->free_page_lock);
+        /*
+         * The guest isn't done hinting, so send a notification
+         * to the guest to actively stop the hinting.
+         */
+        s->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
+        qemu_mutex_unlock(&s->free_page_lock);
+        virtio_notify_config(vdev);
+    }
+}
+
+static void virtio_balloon_free_page_done(VirtIOBalloon *s)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    if (s->free_page_hint_status != FREE_PAGE_HINT_S_DONE) {
+        /* See virtio_balloon_free_page_stop() */
+        qemu_mutex_lock(&s->free_page_lock);
+        s->free_page_hint_status = FREE_PAGE_HINT_S_DONE;
+        qemu_mutex_unlock(&s->free_page_lock);
+        virtio_notify_config(vdev);
+    }
+}
+
+static int
+virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data)
+{
+    VirtIOBalloon *dev = container_of(n, VirtIOBalloon, free_page_hint_notify);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    PrecopyNotifyData *pnd = data;
+
+    if (!virtio_balloon_free_page_support(dev)) {
+        /*
+         * This is an optimization provided to migration, so just return 0 to
+         * have the normal migration process not affected when this feature is
+         * not supported.
+         */
+        return 0;
+    }
+
+    /*
+     * Pages hinted via qemu_guest_free_page_hint() are cleared from the dirty
+     * bitmap and will not get migrated, especially also not when the postcopy
+     * destination starts using them and requests migration from the source; the
+     * faulting thread will stall until postcopy migration finishes and
+     * all threads are woken up. Let's not start free page hinting if postcopy
+     * is possible.
+     */
+    if (migrate_postcopy_ram()) {
+        return 0;
+    }
+
+    switch (pnd->reason) {
+    case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
+        virtio_balloon_free_page_stop(dev);
+        break;
+    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
+        if (vdev->vm_running) {
+            virtio_balloon_free_page_start(dev);
+            break;
+        }
+        /*
+         * Set S_DONE before migrating the vmstate, so the guest will reuse
+         * all hinted pages once running on the destination. Fall through.
+         */
+    case PRECOPY_NOTIFY_CLEANUP:
+        /*
+         * Especially, if something goes wrong during precopy or if migration
+         * is canceled, we have to properly communicate S_DONE to the VM.
+         */
+        virtio_balloon_free_page_done(dev);
+        break;
+    case PRECOPY_NOTIFY_SETUP:
+    case PRECOPY_NOTIFY_COMPLETE:
+        break;
+    default:
+        virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
+    }
+
+    return 0;
+}
+
+static size_t virtio_balloon_config_size(VirtIOBalloon *s)
+{
+    uint64_t features = s->host_features;
+
+    if (s->qemu_4_0_config_size) {
+        return sizeof(struct virtio_balloon_config);
+    }
+    if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
+        return sizeof(struct virtio_balloon_config);
+    }
+    if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        return offsetof(struct virtio_balloon_config, poison_val);
+    }
+    return offsetof(struct virtio_balloon_config, free_page_hint_cmd_id);
+}
+
+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+    struct virtio_balloon_config config = {};
+
+    config.num_pages = cpu_to_le32(dev->num_pages);
+    config.actual = cpu_to_le32(dev->actual);
+    config.poison_val = cpu_to_le32(dev->poison_val);
+
+    if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED) {
+        config.free_page_hint_cmd_id =
+                       cpu_to_le32(dev->free_page_hint_cmd_id);
+    } else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_STOP) {
+        config.free_page_hint_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
+    } else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_DONE) {
+        config.free_page_hint_cmd_id =
+                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
+    }
+
+    trace_virtio_balloon_get_config(config.num_pages, config.actual);
+    memcpy(config_data, &config, virtio_balloon_config_size(dev));
+}
+
+static int build_dimm_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+        DeviceState *dev = DEVICE(obj);
+        if (dev->realized) { /* only realized DIMMs matter */
+            *list = g_slist_prepend(*list, dev);
+        }
+    }
+
+    object_child_foreach(obj, build_dimm_list, opaque);
+    return 0;
+}
+
+static ram_addr_t get_current_ram_size(void)
+{
+    GSList *list = NULL, *item;
+    ram_addr_t size = current_machine->ram_size;
+
+    build_dimm_list(qdev_get_machine(), &list);
+    for (item = list; item; item = g_slist_next(item)) {
+        Object *obj = OBJECT(item->data);
+        if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
+            size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
+                                            &error_abort);
+        }
+    }
+    g_slist_free(list);
+
+    return size;
+}
+
+static bool virtio_balloon_page_poison_support(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+    return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+}
+
+static void virtio_balloon_set_config(VirtIODevice *vdev,
+                                      const uint8_t *config_data)
+{
+    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+    struct virtio_balloon_config config;
+    uint32_t oldactual = dev->actual;
+    ram_addr_t vm_ram_size = get_current_ram_size();
+
+    memcpy(&config, config_data, virtio_balloon_config_size(dev));
+    dev->actual = le32_to_cpu(config.actual);
+    if (dev->actual != oldactual) {
+        qapi_event_send_balloon_change(vm_ram_size -
+                        ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
+    }
+    dev->poison_val = 0;
+    if (virtio_balloon_page_poison_support(dev)) {
+        dev->poison_val = le32_to_cpu(config.poison_val);
+    }
+    trace_virtio_balloon_set_config(dev->actual, oldactual);
+}
+
+static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
+                                            Error **errp)
+{
+    VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+    f |= dev->host_features;
+    virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
+
+    return f;
+}
+
+static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
+{
+    VirtIOBalloon *dev = opaque;
+    info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
+                                             VIRTIO_BALLOON_PFN_SHIFT);
+}
+
+static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+    VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    ram_addr_t vm_ram_size = get_current_ram_size();
+
+    if (target > vm_ram_size) {
+        target = vm_ram_size;
+    }
+    if (target) {
+        dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
+        virtio_notify_config(vdev);
+    }
+    trace_virtio_balloon_to_target(target, dev->num_pages);
+}
+
+static int virtio_balloon_post_load_device(void *opaque, int version_id)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
+
+    if (balloon_stats_enabled(s)) {
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_virtio_balloon_free_page_hint = {
+    .name = "virtio-balloon-device/free-page-report",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = virtio_balloon_free_page_support,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(free_page_hint_cmd_id, VirtIOBalloon),
+        VMSTATE_UINT32(free_page_hint_status, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_balloon_page_poison = {
+    .name = "virtio-balloon-device/page-poison",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = virtio_balloon_page_poison_support,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(poison_val, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_balloon_device = {
+    .name = "virtio-balloon-device",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = virtio_balloon_post_load_device,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(num_pages, VirtIOBalloon),
+        VMSTATE_UINT32(actual, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_balloon_free_page_hint,
+        &vmstate_virtio_balloon_page_poison,
+        NULL
+    }
+};
+
+static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOBalloon *s = VIRTIO_BALLOON(dev);
+    int ret;
+
+    virtio_init(vdev, VIRTIO_ID_BALLOON, virtio_balloon_config_size(s));
+
+    ret = qemu_add_balloon_handler(virtio_balloon_to_target,
+                                   virtio_balloon_stat, s);
+
+    if (ret < 0) {
+        error_setg(errp, "Only one balloon device is supported");
+        virtio_cleanup(vdev);
+        return;
+    }
+
+    if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) &&
+        !s->iothread) {
+        error_setg(errp, "'free-page-hint' requires 'iothread' to be set");
+        virtio_cleanup(vdev);
+        return;
+    }
+
+    s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
+    s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
+    s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
+
+    if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+        s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
+                                           virtio_balloon_handle_free_page_vq);
+        precopy_add_notifier(&s->free_page_hint_notify);
+
+        object_ref(OBJECT(s->iothread));
+        s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
+                                     virtio_ballloon_get_free_page_hints, s);
+    }
+
+    if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
+        s->reporting_vq = virtio_add_queue(vdev, 32,
+                                           virtio_balloon_handle_report);
+    }
+
+    reset_stats(s);
+}
+
+static void virtio_balloon_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOBalloon *s = VIRTIO_BALLOON(dev);
+
+    if (s->free_page_bh) {
+        qemu_bh_delete(s->free_page_bh);
+        object_unref(OBJECT(s->iothread));
+        virtio_balloon_free_page_stop(s);
+        precopy_remove_notifier(&s->free_page_hint_notify);
+    }
+    balloon_stats_destroy_timer(s);
+    qemu_remove_balloon_handler(s);
+
+    virtio_delete_queue(s->ivq);
+    virtio_delete_queue(s->dvq);
+    virtio_delete_queue(s->svq);
+    if (s->free_page_vq) {
+        virtio_delete_queue(s->free_page_vq);
+    }
+    if (s->reporting_vq) {
+        virtio_delete_queue(s->reporting_vq);
+    }
+    virtio_cleanup(vdev);
+}
+
+static void virtio_balloon_device_reset(VirtIODevice *vdev)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+
+    if (virtio_balloon_free_page_support(s)) {
+        virtio_balloon_free_page_stop(s);
+    }
+
+    if (s->stats_vq_elem != NULL) {
+        virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
+        g_free(s->stats_vq_elem);
+        s->stats_vq_elem = NULL;
+    }
+
+    s->poison_val = 0;
+}
+
+static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+
+    if (!s->stats_vq_elem && vdev->vm_running &&
+        (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
+        /* poll stats queue for the element we have discarded when the VM
+         * was stopped */
+        virtio_balloon_receive_stats(vdev, s->svq);
+    }
+
+    if (virtio_balloon_free_page_support(s)) {
+        /*
+         * The VM is woken up and the iothread was blocked, so signal it to
+         * continue.
+         */
+        if (vdev->vm_running && s->block_iothread) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = false;
+            qemu_cond_signal(&s->free_page_cond);
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+
+        /* The VM is stopped, block the iothread. */
+        if (!vdev->vm_running) {
+            qemu_mutex_lock(&s->free_page_lock);
+            s->block_iothread = true;
+            qemu_mutex_unlock(&s->free_page_lock);
+        }
+    }
+}
+
+static void virtio_balloon_instance_init(Object *obj)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+
+    qemu_mutex_init(&s->free_page_lock);
+    qemu_cond_init(&s->free_page_cond);
+    s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
+    s->free_page_hint_notify.notify = virtio_balloon_free_page_hint_notify;
+
+    object_property_add(obj, "guest-stats", "guest statistics",
+                        balloon_stats_get_all, NULL, NULL, NULL);
+
+    object_property_add(obj, "guest-stats-polling-interval", "int",
+                        balloon_stats_get_poll_interval,
+                        balloon_stats_set_poll_interval,
+                        NULL, NULL);
+}
+
+static const VMStateDescription vmstate_virtio_balloon = {
+    .name = "virtio-balloon",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_balloon_properties[] = {
+    DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
+    DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+    DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_PAGE_POISON, true),
+    DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features,
+                    VIRTIO_BALLOON_F_REPORTING, false),
+    /* QEMU 4.0 accidentally changed the config size even when free-page-hint
+     * is disabled, resulting in QEMU 3.1 migration incompatibility.  This
+     * property retains this quirk for QEMU 4.1 machine types.
+     */
+    DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
+                     qemu_4_0_config_size, false),
+    DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
+                     IOThread *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_balloon_properties);
+    dc->vmsd = &vmstate_virtio_balloon;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_balloon_device_realize;
+    vdc->unrealize = virtio_balloon_device_unrealize;
+    vdc->reset = virtio_balloon_device_reset;
+    vdc->get_config = virtio_balloon_get_config;
+    vdc->set_config = virtio_balloon_set_config;
+    vdc->get_features = virtio_balloon_get_features;
+    vdc->set_status = virtio_balloon_set_status;
+    vdc->vmsd = &vmstate_virtio_balloon_device;
+}
+
+static const TypeInfo virtio_balloon_info = {
+    .name = TYPE_VIRTIO_BALLOON,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOBalloon),
+    .instance_init = virtio_balloon_instance_init,
+    .class_init = virtio_balloon_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_balloon_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c
new file mode 100644
index 00000000..9743bee9
--- /dev/null
+++ b/hw/virtio/virtio-blk-pci.c
@@ -0,0 +1,107 @@
+/*
+ * Virtio blk PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-blk.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIOBlkPCI VirtIOBlkPCI;
+
+/*
+ * virtio-blk-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOBlkPCI, VIRTIO_BLK_PCI,
+                         TYPE_VIRTIO_BLK_PCI)
+
+struct VirtIOBlkPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOBlock vdev;
+};
+
+static Property virtio_blk_pci_properties[] = {
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOBlkConf *conf = &dev->vdev.conf;
+
+    if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) {
+        conf->num_queues = virtio_pci_optimal_num_queues(0);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = conf->num_queues + 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_blk_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, virtio_blk_pci_properties);
+    k->realize = virtio_blk_pci_realize;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void virtio_blk_pci_instance_init(Object *obj)
+{
+    VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_BLK);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = {
+    .base_name              = TYPE_VIRTIO_BLK_PCI,
+    .generic_name           = "virtio-blk-pci",
+    .transitional_name      = "virtio-blk-pci-transitional",
+    .non_transitional_name  = "virtio-blk-pci-non-transitional",
+    .instance_size = sizeof(VirtIOBlkPCI),
+    .instance_init = virtio_blk_pci_instance_init,
+    .class_init    = virtio_blk_pci_class_init,
+};
+
+static void virtio_blk_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_blk_pci_info);
+}
+
+type_init(virtio_blk_pci_register)
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
new file mode 100644
index 00000000..896feb37
--- /dev/null
+++ b/hw/virtio/virtio-bus.c
@@ -0,0 +1,372 @@
+/*
+ * VirtioBus
+ *
+ *  Copyright (C) 2012 : GreenSocs Ltd
+ *      http://www.greensocs.com/ , email: info@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   <fred.konrad@greensocs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio.h"
+#include "exec/address-spaces.h"
+
+/* #define DEBUG_VIRTIO_BUS */
+
+#ifdef DEBUG_VIRTIO_BUS
+#define DPRINTF(fmt, ...) \
+do { printf("virtio_bus: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+/* A VirtIODevice is being plugged */
+void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    bool vdev_has_iommu;
+    Error *local_err = NULL;
+
+    DPRINTF("%s: plug device.\n", qbus->name);
+
+    if (klass->pre_plugged != NULL) {
+        klass->pre_plugged(qbus->parent, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    /* Get the features of the plugged device. */
+    assert(vdc->get_features != NULL);
+    vdev->host_features = vdc->get_features(vdev, vdev->host_features,
+                                            &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (klass->device_plugged != NULL) {
+        klass->device_plugged(qbus->parent, &local_err);
+    }
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    vdev->dma_as = &address_space_memory;
+    if (has_iommu) {
+        vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+        /*
+         * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and
+         * device operational. If the driver does not accept IOMMU_PLATFORM
+         * we fail the device.
+         */
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM);
+        if (klass->get_dma_as) {
+            vdev->dma_as = klass->get_dma_as(qbus->parent);
+            if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
+                error_setg(errp,
+                       "iommu_platform=true is not supported by the device");
+                return;
+            }
+        }
+    }
+}
+
+/* Reset the virtio_bus */
+void virtio_bus_reset(VirtioBusState *bus)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+
+    DPRINTF("%s: reset device.\n", BUS(bus)->name);
+    virtio_bus_stop_ioeventfd(bus);
+    if (vdev != NULL) {
+        virtio_reset(vdev);
+    }
+}
+
+/* A VirtIODevice is being unplugged */
+void virtio_bus_device_unplugged(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(qbus);
+
+    DPRINTF("%s: remove device.\n", qbus->name);
+
+    if (vdev != NULL) {
+        if (klass->device_unplugged != NULL) {
+            klass->device_unplugged(qbus->parent);
+        }
+    }
+}
+
+/* Get the device id of the plugged device. */
+uint16_t virtio_bus_get_vdev_id(VirtioBusState *bus)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    assert(vdev != NULL);
+    return vdev->device_id;
+}
+
+/* Get the config_len field of the plugged device. */
+size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    assert(vdev != NULL);
+    return vdev->config_len;
+}
+
+/* Get bad features of the plugged device. */
+uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioDeviceClass *k;
+
+    assert(vdev != NULL);
+    k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    if (k->bad_features != NULL) {
+        return k->bad_features(vdev);
+    } else {
+        return 0;
+    }
+}
+
+/* Get config of the plugged device. */
+void virtio_bus_get_vdev_config(VirtioBusState *bus, uint8_t *config)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioDeviceClass *k;
+
+    assert(vdev != NULL);
+    k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    if (k->get_config != NULL) {
+        k->get_config(vdev, config);
+    }
+}
+
+/* Set config of the plugged device. */
+void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioDeviceClass *k;
+
+    assert(vdev != NULL);
+    k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    if (k->set_config != NULL) {
+        k->set_config(vdev, config);
+    }
+}
+
+/* On success, ioeventfd ownership belongs to the caller.  */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus)
+{
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+
+    /* vhost can be used even if ioeventfd=off in the proxy device,
+     * so do not check k->ioeventfd_enabled.
+     */
+    if (!k->ioeventfd_assign) {
+        return -ENOSYS;
+    }
+
+    if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        virtio_bus_stop_ioeventfd(bus);
+        /* Remember that we need to restart ioeventfd
+         * when ioeventfd_grabbed becomes zero.
+         */
+        bus->ioeventfd_started = true;
+    }
+    bus->ioeventfd_grabbed++;
+    return 0;
+}
+
+void virtio_bus_release_ioeventfd(VirtioBusState *bus)
+{
+    assert(bus->ioeventfd_grabbed != 0);
+    if (--bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        /* Force virtio_bus_start_ioeventfd to act.  */
+        bus->ioeventfd_started = false;
+        virtio_bus_start_ioeventfd(bus);
+    }
+}
+
+int virtio_bus_start_ioeventfd(VirtioBusState *bus)
+{
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+    DeviceState *proxy = DEVICE(BUS(bus)->parent);
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    int r;
+
+    if (!k->ioeventfd_assign || !k->ioeventfd_enabled(proxy)) {
+        return -ENOSYS;
+    }
+    if (bus->ioeventfd_started) {
+        return 0;
+    }
+
+    /* Only set our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        r = vdc->start_ioeventfd(vdev);
+        if (r < 0) {
+            error_report("%s: failed. Fallback to userspace (slower).", __func__);
+            return r;
+        }
+    }
+    bus->ioeventfd_started = true;
+    return 0;
+}
+
+void virtio_bus_stop_ioeventfd(VirtioBusState *bus)
+{
+    VirtIODevice *vdev;
+    VirtioDeviceClass *vdc;
+
+    if (!bus->ioeventfd_started) {
+        return;
+    }
+
+    /* Only remove our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        vdev = virtio_bus_get_device(bus);
+        vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+        vdc->stop_ioeventfd(vdev);
+    }
+    bus->ioeventfd_started = false;
+}
+
+bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus)
+{
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+    DeviceState *proxy = DEVICE(BUS(bus)->parent);
+
+    return k->ioeventfd_assign && k->ioeventfd_enabled(proxy);
+}
+
+/*
+ * This function switches ioeventfd on/off in the device.
+ * The caller must set or clear the handlers for the EventNotifier.
+ */
+int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+    DeviceState *proxy = DEVICE(BUS(bus)->parent);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r = 0;
+
+    if (!k->ioeventfd_assign) {
+        return -ENOSYS;
+    }
+
+    if (assign) {
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            error_report("%s: unable to init event notifier: %s (%d)",
+                         __func__, strerror(-r), r);
+            return r;
+        }
+        r = k->ioeventfd_assign(proxy, notifier, n, true);
+        if (r < 0) {
+            error_report("%s: unable to assign ioeventfd: %d", __func__, r);
+            virtio_bus_cleanup_host_notifier(bus, n);
+        }
+    } else {
+        k->ioeventfd_assign(proxy, notifier, n, false);
+    }
+
+    if (r == 0) {
+        virtio_queue_set_host_notifier_enabled(vq, assign);
+    }
+
+    return r;
+}
+
+void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+
+    /* Test and clear notifier after disabling event,
+     * in case poll callback didn't have time to run.
+     */
+    virtio_queue_host_notifier_read(notifier);
+    event_notifier_cleanup(notifier);
+}
+
+static char *virtio_bus_get_dev_path(DeviceState *dev)
+{
+    BusState *bus = qdev_get_parent_bus(dev);
+    DeviceState *proxy = DEVICE(bus->parent);
+    return qdev_get_dev_path(proxy);
+}
+
+static char *virtio_bus_get_fw_dev_path(DeviceState *dev)
+{
+    return NULL;
+}
+
+bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+
+    if (!klass->iommu_enabled) {
+        return false;
+    }
+
+    return klass->iommu_enabled(qbus->parent);
+}
+
+static void virtio_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *bus_class = BUS_CLASS(klass);
+    bus_class->get_dev_path = virtio_bus_get_dev_path;
+    bus_class->get_fw_dev_path = virtio_bus_get_fw_dev_path;
+}
+
+static const TypeInfo virtio_bus_info = {
+    .name = TYPE_VIRTIO_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(VirtioBusState),
+    .abstract = true,
+    .class_size = sizeof(VirtioBusClass),
+    .class_init = virtio_bus_class_init
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_bus_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
new file mode 100644
index 00000000..0783dc2f
--- /dev/null
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -0,0 +1,94 @@
+/*
+ * Virtio crypto device
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-crypto.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIOCryptoPCI VirtIOCryptoPCI;
+
+/*
+ * virtio-crypto-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_CRYPTO_PCI "virtio-crypto-pci"
+DECLARE_INSTANCE_CHECKER(VirtIOCryptoPCI, VIRTIO_CRYPTO_PCI,
+                         TYPE_VIRTIO_CRYPTO_PCI)
+
+struct VirtIOCryptoPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOCrypto vdev;
+};
+
+static Property virtio_crypto_pci_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOCryptoPCI *vcrypto = VIRTIO_CRYPTO_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&vcrypto->vdev);
+
+    if (vcrypto->vdev.conf.cryptodev == NULL) {
+        error_setg(errp, "'cryptodev' parameter expects a valid object");
+        return;
+    }
+
+    virtio_pci_force_virtio_1(vpci_dev);
+    if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) {
+        return;
+    }
+}
+
+static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = virtio_crypto_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, virtio_crypto_pci_properties);
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void virtio_crypto_initfn(Object *obj)
+{
+    VirtIOCryptoPCI *dev = VIRTIO_CRYPTO_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_CRYPTO);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = {
+    .generic_name  = TYPE_VIRTIO_CRYPTO_PCI,
+    .instance_size = sizeof(VirtIOCryptoPCI),
+    .instance_init = virtio_crypto_initfn,
+    .class_init    = virtio_crypto_pci_class_init,
+};
+
+static void virtio_crypto_pci_register_types(void)
+{
+    virtio_pci_types_register(&virtio_crypto_pci_info);
+}
+type_init(virtio_crypto_pci_register_types)
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
new file mode 100644
index 00000000..97da74e7
--- /dev/null
+++ b/hw/virtio/virtio-crypto.c
@@ -0,0 +1,1250 @@
+/*
+ * Virtio crypto Support
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-crypto.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-access.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "sysemu/cryptodev-vhost.h"
+
+#define VIRTIO_CRYPTO_VM_VERSION 1
+
+typedef struct VirtIOCryptoSessionReq {
+    VirtIODevice *vdev;
+    VirtQueue *vq;
+    VirtQueueElement *elem;
+    CryptoDevBackendSessionInfo info;
+    CryptoDevCompletionFunc cb;
+} VirtIOCryptoSessionReq;
+
+static void virtio_crypto_free_create_session_req(VirtIOCryptoSessionReq *sreq)
+{
+    switch (sreq->info.op_code) {
+    case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+        g_free(sreq->info.u.sym_sess_info.cipher_key);
+        g_free(sreq->info.u.sym_sess_info.auth_key);
+        break;
+
+    case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
+        g_free(sreq->info.u.asym_sess_info.key);
+        break;
+
+    case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION:
+    case VIRTIO_CRYPTO_HASH_DESTROY_SESSION:
+    case VIRTIO_CRYPTO_MAC_DESTROY_SESSION:
+    case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION:
+    case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION:
+        break;
+
+    default:
+        error_report("Unknown opcode: %u", sreq->info.op_code);
+    }
+    g_free(sreq);
+}
+
+/*
+ * Transfer virtqueue index to crypto queue index.
+ * The control virtqueue is after the data virtqueues
+ * so the input value doesn't need to be adjusted
+ */
+static inline int virtio_crypto_vq2q(int queue_index)
+{
+    return queue_index;
+}
+
+static int
+virtio_crypto_cipher_session_helper(VirtIODevice *vdev,
+           CryptoDevBackendSymSessionInfo *info,
+           struct virtio_crypto_cipher_session_para *cipher_para,
+           struct iovec **iov, unsigned int *out_num)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    unsigned int num = *out_num;
+
+    info->cipher_alg = ldl_le_p(&cipher_para->algo);
+    info->key_len = ldl_le_p(&cipher_para->keylen);
+    info->direction = ldl_le_p(&cipher_para->op);
+    DPRINTF("cipher_alg=%" PRIu32 ", info->direction=%" PRIu32 "\n",
+             info->cipher_alg, info->direction);
+
+    if (info->key_len > vcrypto->conf.max_cipher_key_len) {
+        error_report("virtio-crypto length of cipher key is too big: %u",
+                     info->key_len);
+        return -VIRTIO_CRYPTO_ERR;
+    }
+    /* Get cipher key */
+    if (info->key_len > 0) {
+        size_t s;
+        DPRINTF("keylen=%" PRIu32 "\n", info->key_len);
+
+        info->cipher_key = g_malloc(info->key_len);
+        s = iov_to_buf(*iov, num, 0, info->cipher_key, info->key_len);
+        if (unlikely(s != info->key_len)) {
+            virtio_error(vdev, "virtio-crypto cipher key incorrect");
+            return -EFAULT;
+        }
+        iov_discard_front(iov, &num, info->key_len);
+        *out_num = num;
+    }
+
+    return 0;
+}
+
+static int
+virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_sym_create_session_req *sess_req,
+               uint32_t queue_id,
+               uint32_t opcode,
+               struct iovec *iov, unsigned int out_num,
+               VirtIOCryptoSessionReq *sreq)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    CryptoDevBackendSymSessionInfo *sym_info = &sreq->info.u.sym_sess_info;
+    int queue_index;
+    uint32_t op_type;
+    int ret;
+
+    op_type = ldl_le_p(&sess_req->op_type);
+    sreq->info.op_code = opcode;
+
+    sym_info = &sreq->info.u.sym_sess_info;
+    sym_info->op_type = op_type;
+
+    if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        ret = virtio_crypto_cipher_session_helper(vdev, sym_info,
+                           &sess_req->u.cipher.para,
+                           &iov, &out_num);
+        if (ret < 0) {
+            return ret;
+        }
+    } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        size_t s;
+        /* cipher part */
+        ret = virtio_crypto_cipher_session_helper(vdev, sym_info,
+                           &sess_req->u.chain.para.cipher_param,
+                           &iov, &out_num);
+        if (ret < 0) {
+            return ret;
+        }
+        /* hash part */
+        sym_info->alg_chain_order = ldl_le_p(
+                                     &sess_req->u.chain.para.alg_chain_order);
+        sym_info->add_len = ldl_le_p(&sess_req->u.chain.para.aad_len);
+        sym_info->hash_mode = ldl_le_p(&sess_req->u.chain.para.hash_mode);
+        if (sym_info->hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH) {
+            sym_info->hash_alg =
+                ldl_le_p(&sess_req->u.chain.para.u.mac_param.algo);
+            sym_info->auth_key_len = ldl_le_p(
+                             &sess_req->u.chain.para.u.mac_param.auth_key_len);
+            sym_info->hash_result_len = ldl_le_p(
+                           &sess_req->u.chain.para.u.mac_param.hash_result_len);
+            if (sym_info->auth_key_len > vcrypto->conf.max_auth_key_len) {
+                error_report("virtio-crypto length of auth key is too big: %u",
+                             sym_info->auth_key_len);
+                return -VIRTIO_CRYPTO_ERR;
+            }
+            /* get auth key */
+            if (sym_info->auth_key_len > 0) {
+                sym_info->auth_key = g_malloc(sym_info->auth_key_len);
+                s = iov_to_buf(iov, out_num, 0, sym_info->auth_key,
+                               sym_info->auth_key_len);
+                if (unlikely(s != sym_info->auth_key_len)) {
+                    virtio_error(vdev,
+                          "virtio-crypto authenticated key incorrect");
+                    return -EFAULT;
+                }
+                iov_discard_front(&iov, &out_num, sym_info->auth_key_len);
+            }
+        } else if (sym_info->hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN) {
+            sym_info->hash_alg = ldl_le_p(
+                             &sess_req->u.chain.para.u.hash_param.algo);
+            sym_info->hash_result_len = ldl_le_p(
+                        &sess_req->u.chain.para.u.hash_param.hash_result_len);
+        } else {
+            /* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */
+            error_report("unsupported hash mode");
+            return -VIRTIO_CRYPTO_NOTSUPP;
+        }
+    } else {
+        /* VIRTIO_CRYPTO_SYM_OP_NONE */
+        error_report("unsupported cipher op_type: VIRTIO_CRYPTO_SYM_OP_NONE");
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    queue_index = virtio_crypto_vq2q(queue_id);
+    return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info,
+                                            queue_index, sreq->cb, sreq);
+}
+
+static int
+virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_akcipher_create_session_req *sess_req,
+               uint32_t queue_id, uint32_t opcode,
+               struct iovec *iov, unsigned int out_num,
+               VirtIOCryptoSessionReq *sreq)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    CryptoDevBackendAsymSessionInfo *asym_info = &sreq->info.u.asym_sess_info;
+    int queue_index;
+    uint32_t algo, keytype, keylen;
+
+    algo = ldl_le_p(&sess_req->para.algo);
+    keytype = ldl_le_p(&sess_req->para.keytype);
+    keylen = ldl_le_p(&sess_req->para.keylen);
+
+    if ((keytype != VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC)
+         && (keytype != VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE)) {
+        error_report("unsupported asym keytype: %d", keytype);
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    if (keylen) {
+        asym_info->key = g_malloc(keylen);
+        if (iov_to_buf(iov, out_num, 0, asym_info->key, keylen) != keylen) {
+            virtio_error(vdev, "virtio-crypto asym key incorrect");
+            return -EFAULT;
+        }
+        iov_discard_front(&iov, &out_num, keylen);
+    }
+
+    sreq->info.op_code = opcode;
+    asym_info = &sreq->info.u.asym_sess_info;
+    asym_info->algo = algo;
+    asym_info->keytype = keytype;
+    asym_info->keylen = keylen;
+    switch (asym_info->algo) {
+    case VIRTIO_CRYPTO_AKCIPHER_RSA:
+        asym_info->u.rsa.padding_algo =
+            ldl_le_p(&sess_req->para.u.rsa.padding_algo);
+        asym_info->u.rsa.hash_algo =
+            ldl_le_p(&sess_req->para.u.rsa.hash_algo);
+        break;
+
+    /* TODO DSA&ECDSA handling */
+
+    default:
+        return -VIRTIO_CRYPTO_ERR;
+    }
+
+    queue_index = virtio_crypto_vq2q(queue_id);
+    return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info,
+                                            queue_index, sreq->cb, sreq);
+}
+
+static int
+virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto,
+         struct virtio_crypto_destroy_session_req *close_sess_req,
+         uint32_t queue_id,
+         VirtIOCryptoSessionReq *sreq)
+{
+    uint64_t session_id;
+
+    session_id = ldq_le_p(&close_sess_req->session_id);
+    DPRINTF("close session, id=%" PRIu64 "\n", session_id);
+
+    return cryptodev_backend_close_session(
+                vcrypto->cryptodev, session_id, queue_id, sreq->cb, sreq);
+}
+
+static void virtio_crypto_create_session_completion(void *opaque, int ret)
+{
+    VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque;
+    VirtQueue *vq = sreq->vq;
+    VirtQueueElement *elem = sreq->elem;
+    VirtIODevice *vdev = sreq->vdev;
+    struct virtio_crypto_session_input input;
+    struct iovec *in_iov = elem->in_sg;
+    unsigned in_num = elem->in_num;
+    size_t s;
+
+    memset(&input, 0, sizeof(input));
+    /* Serious errors, need to reset virtio crypto device */
+    if (ret == -EFAULT) {
+        virtqueue_detach_element(vq, elem, 0);
+        goto out;
+    } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) {
+        stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
+    } else if (ret == -VIRTIO_CRYPTO_KEY_REJECTED) {
+        stl_le_p(&input.status, VIRTIO_CRYPTO_KEY_REJECTED);
+    } else if (ret != VIRTIO_CRYPTO_OK) {
+        stl_le_p(&input.status, VIRTIO_CRYPTO_ERR);
+    } else {
+        /* Set the session id */
+        stq_le_p(&input.session_id, sreq->info.session_id);
+        stl_le_p(&input.status, VIRTIO_CRYPTO_OK);
+    }
+
+    s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
+    if (unlikely(s != sizeof(input))) {
+        virtio_error(vdev, "virtio-crypto input incorrect");
+        virtqueue_detach_element(vq, elem, 0);
+        goto out;
+    }
+    virtqueue_push(vq, elem, sizeof(input));
+    virtio_notify(vdev, vq);
+
+out:
+    g_free(elem);
+    virtio_crypto_free_create_session_req(sreq);
+}
+
+static void virtio_crypto_destroy_session_completion(void *opaque, int ret)
+{
+    VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque;
+    VirtQueue *vq = sreq->vq;
+    VirtQueueElement *elem = sreq->elem;
+    VirtIODevice *vdev = sreq->vdev;
+    struct iovec *in_iov = elem->in_sg;
+    unsigned in_num = elem->in_num;
+    uint8_t status;
+    size_t s;
+
+    if (ret < 0) {
+        status = VIRTIO_CRYPTO_ERR;
+    } else {
+        status = VIRTIO_CRYPTO_OK;
+    }
+    s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status));
+    if (unlikely(s != sizeof(status))) {
+        virtio_error(vdev, "virtio-crypto status incorrect");
+        virtqueue_detach_element(vq, elem, 0);
+        goto out;
+    }
+    virtqueue_push(vq, elem, sizeof(status));
+    virtio_notify(vdev, vq);
+
+out:
+    g_free(elem);
+    g_free(sreq);
+}
+
+static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    struct virtio_crypto_op_ctrl_req ctrl;
+    VirtQueueElement *elem;
+    VirtIOCryptoSessionReq *sreq;
+    unsigned out_num;
+    unsigned in_num;
+    uint32_t queue_id;
+    uint32_t opcode;
+    struct virtio_crypto_session_input input;
+    size_t s;
+    int ret;
+    struct iovec *out_iov;
+    struct iovec *in_iov;
+
+    for (;;) {
+        g_autofree struct iovec *out_iov_copy = NULL;
+
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            break;
+        }
+        if (elem->out_num < 1 || elem->in_num < 1) {
+            virtio_error(vdev, "virtio-crypto ctrl missing headers");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+
+        out_num = elem->out_num;
+        out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
+        out_iov = out_iov_copy;
+
+        in_num = elem->in_num;
+        in_iov = elem->in_sg;
+
+        if (unlikely(iov_to_buf(out_iov, out_num, 0, &ctrl, sizeof(ctrl))
+                    != sizeof(ctrl))) {
+            virtio_error(vdev, "virtio-crypto request ctrl_hdr too short");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+        iov_discard_front(&out_iov, &out_num, sizeof(ctrl));
+
+        opcode = ldl_le_p(&ctrl.header.opcode);
+        queue_id = ldl_le_p(&ctrl.header.queue_id);
+
+        sreq = g_new0(VirtIOCryptoSessionReq, 1);
+        sreq->vdev = vdev;
+        sreq->vq = vq;
+        sreq->elem = elem;
+
+        switch (opcode) {
+        case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+            sreq->cb = virtio_crypto_create_session_completion;
+            ret = virtio_crypto_create_sym_session(vcrypto,
+                            &ctrl.u.sym_create_session,
+                            queue_id, opcode,
+                            out_iov, out_num,
+                            sreq);
+            if (ret < 0) {
+                virtio_crypto_create_session_completion(sreq, ret);
+            }
+            break;
+
+        case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
+            sreq->cb = virtio_crypto_create_session_completion;
+            ret = virtio_crypto_create_asym_session(vcrypto,
+                             &ctrl.u.akcipher_create_session,
+                             queue_id, opcode,
+                             out_iov, out_num,
+                             sreq);
+            if (ret < 0) {
+                virtio_crypto_create_session_completion(sreq, ret);
+            }
+            break;
+
+        case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_HASH_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_MAC_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION:
+            sreq->cb = virtio_crypto_destroy_session_completion;
+            ret = virtio_crypto_handle_close_session(vcrypto,
+                   &ctrl.u.destroy_session, queue_id,
+                   sreq);
+            if (ret < 0) {
+                virtio_crypto_destroy_session_completion(sreq, ret);
+            }
+            break;
+
+        case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
+        case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
+        case VIRTIO_CRYPTO_AEAD_CREATE_SESSION:
+        default:
+            memset(&input, 0, sizeof(input));
+            error_report("virtio-crypto unsupported ctrl opcode: %d", opcode);
+            stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
+            s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
+            if (unlikely(s != sizeof(input))) {
+                virtio_error(vdev, "virtio-crypto input incorrect");
+                virtqueue_detach_element(vq, elem, 0);
+            } else {
+                virtqueue_push(vq, elem, sizeof(input));
+                virtio_notify(vdev, vq);
+            }
+            g_free(sreq);
+            g_free(elem);
+
+            break;
+        } /* end switch case */
+
+    } /* end for loop */
+}
+
+static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq,
+                                VirtIOCryptoReq *req)
+{
+    req->vcrypto = vcrypto;
+    req->vq = vq;
+    req->in = NULL;
+    req->in_iov = NULL;
+    req->in_num = 0;
+    req->in_len = 0;
+    req->flags = CRYPTODEV_BACKEND_ALG__MAX;
+    memset(&req->op_info, 0x00, sizeof(req->op_info));
+}
+
+static void virtio_crypto_free_request(VirtIOCryptoReq *req)
+{
+    if (!req) {
+        return;
+    }
+
+    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+        size_t max_len;
+        CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info;
+
+        max_len = op_info->iv_len +
+                  op_info->aad_len +
+                  op_info->src_len +
+                  op_info->dst_len +
+                  op_info->digest_result_len;
+
+        /* Zeroize and free request data structure */
+        memset(op_info, 0, sizeof(*op_info) + max_len);
+        g_free(op_info);
+    } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
+        CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info;
+        if (op_info) {
+            g_free(op_info->src);
+            g_free(op_info->dst);
+            memset(op_info, 0, sizeof(*op_info));
+            g_free(op_info);
+        }
+    }
+
+    g_free(req->in_iov);
+    g_free(req);
+}
+
+static void
+virtio_crypto_sym_input_data_helper(VirtIODevice *vdev,
+                VirtIOCryptoReq *req,
+                uint32_t status,
+                CryptoDevBackendSymOpInfo *sym_op_info)
+{
+    size_t s, len;
+    struct iovec *in_iov = req->in_iov;
+
+    if (status != VIRTIO_CRYPTO_OK) {
+        return;
+    }
+
+    len = sym_op_info->src_len;
+    /* Save the cipher result */
+    s = iov_from_buf(in_iov, req->in_num, 0, sym_op_info->dst, len);
+    if (s != len) {
+        virtio_error(vdev, "virtio-crypto dest data incorrect");
+        return;
+    }
+
+    iov_discard_front(&in_iov, &req->in_num, len);
+
+    if (sym_op_info->op_type ==
+                      VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        /* Save the digest result */
+        s = iov_from_buf(in_iov, req->in_num, 0,
+                         sym_op_info->digest_result,
+                         sym_op_info->digest_result_len);
+        if (s != sym_op_info->digest_result_len) {
+            virtio_error(vdev, "virtio-crypto digest result incorrect");
+        }
+    }
+}
+
+static void
+virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev,
+        VirtIOCryptoReq *req, int32_t status,
+        CryptoDevBackendAsymOpInfo *asym_op_info)
+{
+    size_t s, len;
+    struct iovec *in_iov = req->in_iov;
+
+    if (status != VIRTIO_CRYPTO_OK) {
+        return;
+    }
+
+    len = asym_op_info->dst_len;
+    if (!len) {
+        return;
+    }
+
+    s = iov_from_buf(in_iov, req->in_num, 0, asym_op_info->dst, len);
+    if (s != len) {
+        virtio_error(vdev, "virtio-crypto asym dest data incorrect");
+        return;
+    }
+
+    iov_discard_front(&in_iov, &req->in_num, len);
+
+    /* For akcipher, dst_len may be changed after operation */
+    req->in_len = sizeof(struct virtio_crypto_inhdr) + asym_op_info->dst_len;
+}
+
+static void virtio_crypto_req_complete(void *opaque, int ret)
+{
+    VirtIOCryptoReq *req = (VirtIOCryptoReq *)opaque;
+    VirtIOCrypto *vcrypto = req->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    uint8_t status = -ret;
+
+    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+        virtio_crypto_sym_input_data_helper(vdev, req, status,
+                                            req->op_info.u.sym_op_info);
+    } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
+        virtio_crypto_akcipher_input_data_helper(vdev, req, status,
+                                             req->op_info.u.asym_op_info);
+    }
+    stb_p(&req->in->status, status);
+    virtqueue_push(req->vq, &req->elem, req->in_len);
+    virtio_notify(vdev, req->vq);
+    virtio_crypto_free_request(req);
+}
+
+static VirtIOCryptoReq *
+virtio_crypto_get_request(VirtIOCrypto *s, VirtQueue *vq)
+{
+    VirtIOCryptoReq *req = virtqueue_pop(vq, sizeof(VirtIOCryptoReq));
+
+    if (req) {
+        virtio_crypto_init_request(s, vq, req);
+    }
+    return req;
+}
+
+static CryptoDevBackendSymOpInfo *
+virtio_crypto_sym_op_helper(VirtIODevice *vdev,
+           struct virtio_crypto_cipher_para *cipher_para,
+           struct virtio_crypto_alg_chain_data_para *alg_chain_para,
+           struct iovec *iov, unsigned int out_num)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    CryptoDevBackendSymOpInfo *op_info;
+    uint32_t src_len = 0, dst_len = 0;
+    uint32_t iv_len = 0;
+    uint32_t aad_len = 0, hash_result_len = 0;
+    uint32_t hash_start_src_offset = 0, len_to_hash = 0;
+    uint32_t cipher_start_src_offset = 0, len_to_cipher = 0;
+
+    uint64_t max_len, curr_size = 0;
+    size_t s;
+
+    /* Plain cipher */
+    if (cipher_para) {
+        iv_len = ldl_le_p(&cipher_para->iv_len);
+        src_len = ldl_le_p(&cipher_para->src_data_len);
+        dst_len = ldl_le_p(&cipher_para->dst_data_len);
+    } else if (alg_chain_para) { /* Algorithm chain */
+        iv_len = ldl_le_p(&alg_chain_para->iv_len);
+        src_len = ldl_le_p(&alg_chain_para->src_data_len);
+        dst_len = ldl_le_p(&alg_chain_para->dst_data_len);
+
+        aad_len = ldl_le_p(&alg_chain_para->aad_len);
+        hash_result_len = ldl_le_p(&alg_chain_para->hash_result_len);
+        hash_start_src_offset = ldl_le_p(
+                         &alg_chain_para->hash_start_src_offset);
+        cipher_start_src_offset = ldl_le_p(
+                         &alg_chain_para->cipher_start_src_offset);
+        len_to_cipher = ldl_le_p(&alg_chain_para->len_to_cipher);
+        len_to_hash = ldl_le_p(&alg_chain_para->len_to_hash);
+    } else {
+        return NULL;
+    }
+
+    max_len = (uint64_t)iv_len + aad_len + src_len + dst_len + hash_result_len;
+    if (unlikely(max_len > vcrypto->conf.max_size)) {
+        virtio_error(vdev, "virtio-crypto too big length");
+        return NULL;
+    }
+
+    op_info = g_malloc0(sizeof(CryptoDevBackendSymOpInfo) + max_len);
+    op_info->iv_len = iv_len;
+    op_info->src_len = src_len;
+    op_info->dst_len = dst_len;
+    op_info->aad_len = aad_len;
+    op_info->digest_result_len = hash_result_len;
+    op_info->hash_start_src_offset = hash_start_src_offset;
+    op_info->len_to_hash = len_to_hash;
+    op_info->cipher_start_src_offset = cipher_start_src_offset;
+    op_info->len_to_cipher = len_to_cipher;
+    /* Handle the initilization vector */
+    if (op_info->iv_len > 0) {
+        DPRINTF("iv_len=%" PRIu32 "\n", op_info->iv_len);
+        op_info->iv = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->iv, op_info->iv_len);
+        if (unlikely(s != op_info->iv_len)) {
+            virtio_error(vdev, "virtio-crypto iv incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->iv_len);
+        curr_size += op_info->iv_len;
+    }
+
+    /* Handle additional authentication data if exists */
+    if (op_info->aad_len > 0) {
+        DPRINTF("aad_len=%" PRIu32 "\n", op_info->aad_len);
+        op_info->aad_data = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->aad_data, op_info->aad_len);
+        if (unlikely(s != op_info->aad_len)) {
+            virtio_error(vdev, "virtio-crypto additional auth data incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->aad_len);
+
+        curr_size += op_info->aad_len;
+    }
+
+    /* Handle the source data */
+    if (op_info->src_len > 0) {
+        DPRINTF("src_len=%" PRIu32 "\n", op_info->src_len);
+        op_info->src = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->src, op_info->src_len);
+        if (unlikely(s != op_info->src_len)) {
+            virtio_error(vdev, "virtio-crypto source data incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->src_len);
+
+        curr_size += op_info->src_len;
+    }
+
+    /* Handle the destination data */
+    op_info->dst = op_info->data + curr_size;
+    curr_size += op_info->dst_len;
+
+    DPRINTF("dst_len=%" PRIu32 "\n", op_info->dst_len);
+
+    /* Handle the hash digest result */
+    if (hash_result_len > 0) {
+        DPRINTF("hash_result_len=%" PRIu32 "\n", hash_result_len);
+        op_info->digest_result = op_info->data + curr_size;
+    }
+
+    return op_info;
+
+err:
+    g_free(op_info);
+    return NULL;
+}
+
+static int
+virtio_crypto_handle_sym_req(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_sym_data_req *req,
+               CryptoDevBackendOpInfo *op_info,
+               struct iovec *iov, unsigned int out_num)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    CryptoDevBackendSymOpInfo *sym_op_info;
+    uint32_t op_type;
+
+    op_type = ldl_le_p(&req->op_type);
+    if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        sym_op_info = virtio_crypto_sym_op_helper(vdev, &req->u.cipher.para,
+                                              NULL, iov, out_num);
+        if (!sym_op_info) {
+            return -EFAULT;
+        }
+    } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        sym_op_info = virtio_crypto_sym_op_helper(vdev, NULL,
+                                              &req->u.chain.para,
+                                              iov, out_num);
+        if (!sym_op_info) {
+            return -EFAULT;
+        }
+    } else {
+        /* VIRTIO_CRYPTO_SYM_OP_NONE */
+        error_report("virtio-crypto unsupported cipher type");
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    sym_op_info->op_type = op_type;
+    op_info->u.sym_op_info = sym_op_info;
+
+    return 0;
+}
+
+static int
+virtio_crypto_handle_asym_req(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_akcipher_data_req *req,
+               CryptoDevBackendOpInfo *op_info,
+               struct iovec *iov, unsigned int out_num)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    CryptoDevBackendAsymOpInfo *asym_op_info;
+    uint32_t src_len;
+    uint32_t dst_len;
+    uint32_t len;
+    uint8_t *src = NULL;
+    uint8_t *dst = NULL;
+
+    asym_op_info = g_new0(CryptoDevBackendAsymOpInfo, 1);
+    src_len = ldl_le_p(&req->para.src_data_len);
+    dst_len = ldl_le_p(&req->para.dst_data_len);
+
+    if (src_len > 0) {
+        src = g_malloc0(src_len);
+        len = iov_to_buf(iov, out_num, 0, src, src_len);
+        if (unlikely(len != src_len)) {
+            virtio_error(vdev, "virtio-crypto asym src data incorrect"
+                         "expected %u, actual %u", src_len, len);
+            goto err;
+        }
+
+        iov_discard_front(&iov, &out_num, src_len);
+    }
+
+    if (dst_len > 0) {
+        dst = g_malloc0(dst_len);
+
+        if (op_info->op_code == VIRTIO_CRYPTO_AKCIPHER_VERIFY) {
+            len = iov_to_buf(iov, out_num, 0, dst, dst_len);
+            if (unlikely(len != dst_len)) {
+                virtio_error(vdev, "virtio-crypto asym dst data incorrect"
+                             "expected %u, actual %u", dst_len, len);
+                goto err;
+            }
+
+            iov_discard_front(&iov, &out_num, dst_len);
+        }
+    }
+
+    asym_op_info->src_len = src_len;
+    asym_op_info->dst_len = dst_len;
+    asym_op_info->src = src;
+    asym_op_info->dst = dst;
+    op_info->u.asym_op_info = asym_op_info;
+
+    return 0;
+
+ err:
+    g_free(asym_op_info);
+    g_free(src);
+    g_free(dst);
+
+    return -EFAULT;
+}
+
+static int
+virtio_crypto_handle_request(VirtIOCryptoReq *request)
+{
+    VirtIOCrypto *vcrypto = request->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    VirtQueueElement *elem = &request->elem;
+    int queue_index = virtio_crypto_vq2q(virtio_get_queue_index(request->vq));
+    struct virtio_crypto_op_data_req req;
+    int ret;
+    g_autofree struct iovec *in_iov_copy = NULL;
+    g_autofree struct iovec *out_iov_copy = NULL;
+    struct iovec *in_iov;
+    struct iovec *out_iov;
+    unsigned in_num;
+    unsigned out_num;
+    uint32_t opcode;
+    CryptoDevBackendOpInfo *op_info = &request->op_info;
+
+    if (elem->out_num < 1 || elem->in_num < 1) {
+        virtio_error(vdev, "virtio-crypto dataq missing headers");
+        return -1;
+    }
+
+    out_num = elem->out_num;
+    out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
+    out_iov = out_iov_copy;
+
+    in_num = elem->in_num;
+    in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num);
+    in_iov = in_iov_copy;
+
+    if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
+                != sizeof(req))) {
+        virtio_error(vdev, "virtio-crypto request outhdr too short");
+        return -1;
+    }
+    iov_discard_front(&out_iov, &out_num, sizeof(req));
+
+    if (in_iov[in_num - 1].iov_len <
+            sizeof(struct virtio_crypto_inhdr)) {
+        virtio_error(vdev, "virtio-crypto request inhdr too short");
+        return -1;
+    }
+    /* We always touch the last byte, so just see how big in_iov is. */
+    request->in_len = iov_size(in_iov, in_num);
+    request->in = (void *)in_iov[in_num - 1].iov_base
+              + in_iov[in_num - 1].iov_len
+              - sizeof(struct virtio_crypto_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_crypto_inhdr));
+
+    /*
+     * The length of operation result, including dest_data
+     * and digest_result if exists.
+     */
+    request->in_num = in_num;
+    request->in_iov = in_iov;
+    /* now, we free the in_iov_copy inside virtio_crypto_free_request */
+    in_iov_copy = NULL;
+
+    opcode = ldl_le_p(&req.header.opcode);
+    op_info->session_id = ldq_le_p(&req.header.session_id);
+    op_info->op_code = opcode;
+
+    switch (opcode) {
+    case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
+    case VIRTIO_CRYPTO_CIPHER_DECRYPT:
+        op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_SYM;
+        ret = virtio_crypto_handle_sym_req(vcrypto,
+                         &req.u.sym_req, op_info,
+                         out_iov, out_num);
+        goto check_result;
+
+    case VIRTIO_CRYPTO_AKCIPHER_ENCRYPT:
+    case VIRTIO_CRYPTO_AKCIPHER_DECRYPT:
+    case VIRTIO_CRYPTO_AKCIPHER_SIGN:
+    case VIRTIO_CRYPTO_AKCIPHER_VERIFY:
+        op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_ASYM;
+        ret = virtio_crypto_handle_asym_req(vcrypto,
+                         &req.u.akcipher_req, op_info,
+                         out_iov, out_num);
+
+check_result:
+        /* Serious errors, need to reset virtio crypto device */
+        if (ret == -EFAULT) {
+            return -1;
+        } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) {
+            virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
+        } else {
+            ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev,
+                                    request, queue_index,
+                                    virtio_crypto_req_complete,
+                                    request);
+            if (ret < 0) {
+                virtio_crypto_req_complete(request, ret);
+            }
+        }
+        break;
+
+    case VIRTIO_CRYPTO_HASH:
+    case VIRTIO_CRYPTO_MAC:
+    case VIRTIO_CRYPTO_AEAD_ENCRYPT:
+    case VIRTIO_CRYPTO_AEAD_DECRYPT:
+    default:
+        error_report("virtio-crypto unsupported dataq opcode: %u",
+                     opcode);
+        virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
+    }
+
+    return 0;
+}
+
+static void virtio_crypto_handle_dataq(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    VirtIOCryptoReq *req;
+
+    while ((req = virtio_crypto_get_request(vcrypto, vq))) {
+        if (virtio_crypto_handle_request(req) < 0) {
+            virtqueue_detach_element(req->vq, &req->elem, 0);
+            virtio_crypto_free_request(req);
+            break;
+        }
+    }
+}
+
+static void virtio_crypto_dataq_bh(void *opaque)
+{
+    VirtIOCryptoQueue *q = opaque;
+    VirtIOCrypto *vcrypto = q->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+
+    /* This happens when device was stopped but BH wasn't. */
+    if (!vdev->vm_running) {
+        return;
+    }
+
+    /* Just in case the driver is not ready on more */
+    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
+        return;
+    }
+
+    for (;;) {
+        virtio_crypto_handle_dataq(vdev, q->dataq);
+        virtio_queue_set_notification(q->dataq, 1);
+
+        /* Are we done or did the guest add more buffers? */
+        if (virtio_queue_empty(q->dataq)) {
+            break;
+        }
+
+        virtio_queue_set_notification(q->dataq, 0);
+    }
+}
+
+static void
+virtio_crypto_handle_dataq_bh(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    VirtIOCryptoQueue *q =
+         &vcrypto->vqs[virtio_crypto_vq2q(virtio_get_queue_index(vq))];
+
+    /* This happens when device was stopped but VCPU wasn't. */
+    if (!vdev->vm_running) {
+        return;
+    }
+    virtio_queue_set_notification(vq, 0);
+    qemu_bh_schedule(q->dataq_bh);
+}
+
+static uint64_t virtio_crypto_get_features(VirtIODevice *vdev,
+                                           uint64_t features,
+                                           Error **errp)
+{
+    return features;
+}
+
+static void virtio_crypto_reset(VirtIODevice *vdev)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    /* multiqueue is disabled by default */
+    vcrypto->curr_queues = 1;
+    if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) {
+        vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
+    } else {
+        vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
+    }
+}
+
+static void virtio_crypto_init_config(VirtIODevice *vdev)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+
+    vcrypto->conf.crypto_services =
+                     vcrypto->conf.cryptodev->conf.crypto_services;
+    vcrypto->conf.cipher_algo_l =
+                     vcrypto->conf.cryptodev->conf.cipher_algo_l;
+    vcrypto->conf.cipher_algo_h =
+                     vcrypto->conf.cryptodev->conf.cipher_algo_h;
+    vcrypto->conf.hash_algo = vcrypto->conf.cryptodev->conf.hash_algo;
+    vcrypto->conf.mac_algo_l = vcrypto->conf.cryptodev->conf.mac_algo_l;
+    vcrypto->conf.mac_algo_h = vcrypto->conf.cryptodev->conf.mac_algo_h;
+    vcrypto->conf.aead_algo = vcrypto->conf.cryptodev->conf.aead_algo;
+    vcrypto->conf.akcipher_algo = vcrypto->conf.cryptodev->conf.akcipher_algo;
+    vcrypto->conf.max_cipher_key_len =
+                  vcrypto->conf.cryptodev->conf.max_cipher_key_len;
+    vcrypto->conf.max_auth_key_len =
+                  vcrypto->conf.cryptodev->conf.max_auth_key_len;
+    vcrypto->conf.max_size = vcrypto->conf.cryptodev->conf.max_size;
+}
+
+static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev);
+    int i;
+
+    vcrypto->cryptodev = vcrypto->conf.cryptodev;
+    if (vcrypto->cryptodev == NULL) {
+        error_setg(errp, "'cryptodev' parameter expects a valid object");
+        return;
+    } else if (cryptodev_backend_is_used(vcrypto->cryptodev)) {
+        error_setg(errp, "can't use already used cryptodev backend: %s",
+                   object_get_canonical_path_component(OBJECT(vcrypto->conf.cryptodev)));
+        return;
+    }
+
+    vcrypto->max_queues = MAX(vcrypto->cryptodev->conf.peers.queues, 1);
+    if (vcrypto->max_queues + 1 > VIRTIO_QUEUE_MAX) {
+        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
+                   "must be a positive integer less than %d.",
+                   vcrypto->max_queues, VIRTIO_QUEUE_MAX);
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_CRYPTO, vcrypto->config_size);
+    vcrypto->curr_queues = 1;
+    vcrypto->vqs = g_new0(VirtIOCryptoQueue, vcrypto->max_queues);
+    for (i = 0; i < vcrypto->max_queues; i++) {
+        vcrypto->vqs[i].dataq =
+                 virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
+        vcrypto->vqs[i].dataq_bh =
+                 qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
+        vcrypto->vqs[i].vcrypto = vcrypto;
+    }
+
+    vcrypto->ctrl_vq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_ctrl);
+    if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) {
+        vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
+    } else {
+        vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
+    }
+
+    virtio_crypto_init_config(vdev);
+    cryptodev_backend_set_used(vcrypto->cryptodev, true);
+}
+
+static void virtio_crypto_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev);
+    VirtIOCryptoQueue *q;
+    int i, max_queues;
+
+    max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1;
+    for (i = 0; i < max_queues; i++) {
+        virtio_delete_queue(vcrypto->vqs[i].dataq);
+        q = &vcrypto->vqs[i];
+        qemu_bh_delete(q->dataq_bh);
+    }
+
+    g_free(vcrypto->vqs);
+    virtio_delete_queue(vcrypto->ctrl_vq);
+
+    virtio_cleanup(vdev);
+    cryptodev_backend_set_used(vcrypto->cryptodev, false);
+}
+
+static const VMStateDescription vmstate_virtio_crypto = {
+    .name = "virtio-crypto",
+    .unmigratable = 1,
+    .minimum_version_id = VIRTIO_CRYPTO_VM_VERSION,
+    .version_id = VIRTIO_CRYPTO_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_crypto_properties[] = {
+    DEFINE_PROP_LINK("cryptodev", VirtIOCrypto, conf.cryptodev,
+                     TYPE_CRYPTODEV_BACKEND, CryptoDevBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_crypto_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOCrypto *c = VIRTIO_CRYPTO(vdev);
+    struct virtio_crypto_config crypto_cfg = {};
+
+    /*
+     * Virtio-crypto device conforms to VIRTIO 1.0 which is always LE,
+     * so we can use LE accessors directly.
+     */
+    stl_le_p(&crypto_cfg.status, c->status);
+    stl_le_p(&crypto_cfg.max_dataqueues, c->max_queues);
+    stl_le_p(&crypto_cfg.crypto_services, c->conf.crypto_services);
+    stl_le_p(&crypto_cfg.cipher_algo_l, c->conf.cipher_algo_l);
+    stl_le_p(&crypto_cfg.cipher_algo_h, c->conf.cipher_algo_h);
+    stl_le_p(&crypto_cfg.hash_algo, c->conf.hash_algo);
+    stl_le_p(&crypto_cfg.mac_algo_l, c->conf.mac_algo_l);
+    stl_le_p(&crypto_cfg.mac_algo_h, c->conf.mac_algo_h);
+    stl_le_p(&crypto_cfg.aead_algo, c->conf.aead_algo);
+    stl_le_p(&crypto_cfg.max_cipher_key_len, c->conf.max_cipher_key_len);
+    stl_le_p(&crypto_cfg.max_auth_key_len, c->conf.max_auth_key_len);
+    stq_le_p(&crypto_cfg.max_size, c->conf.max_size);
+    stl_le_p(&crypto_cfg.akcipher_algo, c->conf.akcipher_algo);
+
+    memcpy(config, &crypto_cfg, c->config_size);
+}
+
+static bool virtio_crypto_started(VirtIOCrypto *c, uint8_t status)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(c);
+    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+        (c->status & VIRTIO_CRYPTO_S_HW_READY) && vdev->vm_running;
+}
+
+static void virtio_crypto_vhost_status(VirtIOCrypto *c, uint8_t status)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(c);
+    int queues = c->multiqueue ? c->max_queues : 1;
+    CryptoDevBackend *b = c->cryptodev;
+    CryptoDevBackendClient *cc = b->conf.peers.ccs[0];
+
+    if (!cryptodev_get_vhost(cc, b, 0)) {
+        return;
+    }
+
+    if ((virtio_crypto_started(c, status)) == !!c->vhost_started) {
+        return;
+    }
+
+    if (!c->vhost_started) {
+        int r;
+
+        c->vhost_started = 1;
+        r = cryptodev_vhost_start(vdev, queues);
+        if (r < 0) {
+            error_report("unable to start vhost crypto: %d: "
+                         "falling back on userspace virtio", -r);
+            c->vhost_started = 0;
+        }
+    } else {
+        cryptodev_vhost_stop(vdev, queues);
+        c->vhost_started = 0;
+    }
+}
+
+static void virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+
+    virtio_crypto_vhost_status(vcrypto, status);
+}
+
+static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                           bool mask)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    int queue = virtio_crypto_vq2q(idx);
+
+    assert(vcrypto->vhost_started);
+
+    cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask);
+}
+
+static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    int queue = virtio_crypto_vq2q(idx);
+
+    assert(vcrypto->vhost_started);
+
+    return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
+}
+
+static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    CryptoDevBackend *b = vcrypto->cryptodev;
+    CryptoDevBackendClient *cc = b->conf.peers.ccs[0];
+    CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0);
+    return &vhost_crypto->dev;
+}
+
+static void virtio_crypto_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_crypto_properties);
+    dc->vmsd = &vmstate_virtio_crypto;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_crypto_device_realize;
+    vdc->unrealize = virtio_crypto_device_unrealize;
+    vdc->get_config = virtio_crypto_get_config;
+    vdc->get_features = virtio_crypto_get_features;
+    vdc->reset = virtio_crypto_reset;
+    vdc->set_status = virtio_crypto_set_status;
+    vdc->guest_notifier_mask = virtio_crypto_guest_notifier_mask;
+    vdc->guest_notifier_pending = virtio_crypto_guest_notifier_pending;
+    vdc->get_vhost = virtio_crypto_get_vhost;
+}
+
+static void virtio_crypto_instance_init(Object *obj)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(obj);
+
+    /*
+     * The default config_size is sizeof(struct virtio_crypto_config).
+     * Can be overriden with virtio_crypto_set_config_size.
+     */
+    vcrypto->config_size = sizeof(struct virtio_crypto_config);
+}
+
+static const TypeInfo virtio_crypto_info = {
+    .name = TYPE_VIRTIO_CRYPTO,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOCrypto),
+    .instance_init = virtio_crypto_instance_init,
+    .class_init = virtio_crypto_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_crypto_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c
new file mode 100644
index 00000000..cf8a9cf9
--- /dev/null
+++ b/hw/virtio/virtio-input-host-pci.c
@@ -0,0 +1,47 @@
+/*
+ * Virtio input host PCI Bindings
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-input.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIOInputHostPCI VirtIOInputHostPCI;
+
+#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci"
+DECLARE_INSTANCE_CHECKER(VirtIOInputHostPCI, VIRTIO_INPUT_HOST_PCI,
+                         TYPE_VIRTIO_INPUT_HOST_PCI)
+
+struct VirtIOInputHostPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOInputHost vdev;
+};
+
+static void virtio_host_initfn(Object *obj)
+{
+    VirtIOInputHostPCI *dev = VIRTIO_INPUT_HOST_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_INPUT_HOST);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_input_host_pci_info = {
+    .generic_name  = TYPE_VIRTIO_INPUT_HOST_PCI,
+    .parent        = TYPE_VIRTIO_INPUT_PCI,
+    .instance_size = sizeof(VirtIOInputHostPCI),
+    .instance_init = virtio_host_initfn,
+};
+
+static void virtio_input_host_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_input_host_pci_info);
+}
+
+type_init(virtio_input_host_pci_register)
diff --git a/hw/virtio/virtio-input-pci.c b/hw/virtio/virtio-input-pci.c
new file mode 100644
index 00000000..a9d09923
--- /dev/null
+++ b/hw/virtio/virtio-input-pci.c
@@ -0,0 +1,155 @@
+/*
+ * Virtio input PCI Bindings
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-input.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+
+/*
+ * virtio-input-pci: This extends VirtioPCIProxy.
+ */
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputPCI, VIRTIO_INPUT_PCI)
+
+struct VirtIOInputPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOInput vdev;
+};
+
+#define TYPE_VIRTIO_INPUT_HID_PCI "virtio-input-hid-pci"
+#define TYPE_VIRTIO_KEYBOARD_PCI  "virtio-keyboard-pci"
+#define TYPE_VIRTIO_MOUSE_PCI     "virtio-mouse-pci"
+#define TYPE_VIRTIO_TABLET_PCI    "virtio-tablet-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOInputHIDPCI, VIRTIO_INPUT_HID_PCI)
+
+struct VirtIOInputHIDPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOInputHID vdev;
+};
+
+static Property virtio_input_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_input_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOInputPCI *vinput = VIRTIO_INPUT_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&vinput->vdev);
+
+    virtio_pci_force_virtio_1(vpci_dev);
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_input_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_input_pci_properties);
+    k->realize = virtio_input_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+
+    pcidev_k->class_id = PCI_CLASS_INPUT_OTHER;
+}
+
+static void virtio_input_hid_kbd_pci_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    pcidev_k->class_id = PCI_CLASS_INPUT_KEYBOARD;
+}
+
+static void virtio_input_hid_mouse_pci_class_init(ObjectClass *klass,
+                                                  void *data)
+{
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    pcidev_k->class_id = PCI_CLASS_INPUT_MOUSE;
+}
+
+static void virtio_keyboard_initfn(Object *obj)
+{
+    VirtIOInputHIDPCI *dev = VIRTIO_INPUT_HID_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_KEYBOARD);
+}
+
+static void virtio_mouse_initfn(Object *obj)
+{
+    VirtIOInputHIDPCI *dev = VIRTIO_INPUT_HID_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_MOUSE);
+}
+
+static void virtio_tablet_initfn(Object *obj)
+{
+    VirtIOInputHIDPCI *dev = VIRTIO_INPUT_HID_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_TABLET);
+}
+
+static const TypeInfo virtio_input_pci_info = {
+    .name          = TYPE_VIRTIO_INPUT_PCI,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VirtIOInputPCI),
+    .class_init    = virtio_input_pci_class_init,
+    .abstract      = true,
+};
+
+static const TypeInfo virtio_input_hid_pci_info = {
+    .name          = TYPE_VIRTIO_INPUT_HID_PCI,
+    .parent        = TYPE_VIRTIO_INPUT_PCI,
+    .instance_size = sizeof(VirtIOInputHIDPCI),
+    .abstract      = true,
+};
+
+static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = {
+    .generic_name  = TYPE_VIRTIO_KEYBOARD_PCI,
+    .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
+    .class_init    = virtio_input_hid_kbd_pci_class_init,
+    .instance_size = sizeof(VirtIOInputHIDPCI),
+    .instance_init = virtio_keyboard_initfn,
+};
+
+static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = {
+    .generic_name  = TYPE_VIRTIO_MOUSE_PCI,
+    .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
+    .class_init    = virtio_input_hid_mouse_pci_class_init,
+    .instance_size = sizeof(VirtIOInputHIDPCI),
+    .instance_init = virtio_mouse_initfn,
+};
+
+static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = {
+    .generic_name  = TYPE_VIRTIO_TABLET_PCI,
+    .parent        = TYPE_VIRTIO_INPUT_HID_PCI,
+    .instance_size = sizeof(VirtIOInputHIDPCI),
+    .instance_init = virtio_tablet_initfn,
+};
+
+static void virtio_pci_input_register(void)
+{
+    /* Base types: */
+    type_register_static(&virtio_input_pci_info);
+    type_register_static(&virtio_input_hid_pci_info);
+
+    /* Implementations: */
+    virtio_pci_types_register(&virtio_keyboard_pci_info);
+    virtio_pci_types_register(&virtio_mouse_pci_info);
+    virtio_pci_types_register(&virtio_tablet_pci_info);
+}
+
+type_init(virtio_pci_input_register)
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
new file mode 100644
index 00000000..7ef2f9dc
--- /dev/null
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -0,0 +1,112 @@
+/*
+ * Virtio IOMMU PCI Bindings
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ * Written by Eric Auger
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-iommu.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/pci/pci_bus.h"
+#include "qom/object.h"
+
+typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI;
+
+/*
+ * virtio-iommu-pci: This extends VirtioPCIProxy.
+ *
+ */
+DECLARE_INSTANCE_CHECKER(VirtIOIOMMUPCI, VIRTIO_IOMMU_PCI,
+                         TYPE_VIRTIO_IOMMU_PCI)
+
+struct VirtIOIOMMUPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOIOMMU vdev;
+};
+
+static Property virtio_iommu_pci_properties[] = {
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI,
+                      vdev.nb_reserved_regions, vdev.reserved_regions,
+                      qdev_prop_reserved_region, ReservedRegion),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev);
+    PCIBus *pbus = pci_get_bus(&vpci_dev->pci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+
+    if (!qdev_get_machine_hotplug_handler(DEVICE(vpci_dev))) {
+        error_setg(errp, "Check your machine implements a hotplug handler "
+                         "for the virtio-iommu-pci device");
+        return;
+    }
+    for (int i = 0; i < s->nb_reserved_regions; i++) {
+        if (s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
+            s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
+            error_setg(errp, "reserved region %d has an invalid type", i);
+            error_append_hint(errp, "Valid values are 0 and 1\n");
+            return;
+        }
+    }
+    if (!pci_bus_is_root(pbus)) {
+        error_setg(errp, "virtio-iommu-pci must be plugged on the root bus");
+        return;
+    }
+
+    object_property_set_link(OBJECT(dev), "primary-bus",
+                             OBJECT(pbus), &error_abort);
+
+    virtio_pci_force_virtio_1(vpci_dev);
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_iommu_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = virtio_iommu_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, virtio_iommu_pci_properties);
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+    dc->hotpluggable = false;
+}
+
+static void virtio_iommu_pci_instance_init(Object *obj)
+{
+    VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_IOMMU);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = {
+    .generic_name  = TYPE_VIRTIO_IOMMU_PCI,
+    .instance_size = sizeof(VirtIOIOMMUPCI),
+    .instance_init = virtio_iommu_pci_instance_init,
+    .class_init    = virtio_iommu_pci_class_init,
+};
+
+static void virtio_iommu_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_iommu_pci_info);
+}
+
+type_init(virtio_iommu_pci_register)
+
+
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
new file mode 100644
index 00000000..62e07ec2
--- /dev/null
+++ b/hw/virtio/virtio-iommu.c
@@ -0,0 +1,1425 @@
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/iov.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+#include "standard-headers/linux/virtio_ids.h"
+
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-iommu.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
+
+/* Max size */
+#define VIOMMU_DEFAULT_QUEUE_SIZE 256
+#define VIOMMU_PROBE_SIZE 512
+
+typedef struct VirtIOIOMMUDomain {
+    uint32_t id;
+    bool bypass;
+    GTree *mappings;
+    QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
+} VirtIOIOMMUDomain;
+
+typedef struct VirtIOIOMMUEndpoint {
+    uint32_t id;
+    VirtIOIOMMUDomain *domain;
+    IOMMUMemoryRegion *iommu_mr;
+    QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
+} VirtIOIOMMUEndpoint;
+
+typedef struct VirtIOIOMMUInterval {
+    uint64_t low;
+    uint64_t high;
+} VirtIOIOMMUInterval;
+
+typedef struct VirtIOIOMMUMapping {
+    uint64_t phys_addr;
+    uint32_t flags;
+} VirtIOIOMMUMapping;
+
+static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
+{
+    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
+}
+
+static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev)
+{
+    uint32_t sid;
+    bool bypassed;
+    VirtIOIOMMU *s = sdev->viommu;
+    VirtIOIOMMUEndpoint *ep;
+
+    sid = virtio_iommu_get_bdf(sdev);
+
+    qemu_rec_mutex_lock(&s->mutex);
+    /* need to check bypass before system reset */
+    if (!s->endpoints) {
+        bypassed = s->config.bypass;
+        goto unlock;
+    }
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+    if (!ep || !ep->domain) {
+        bypassed = s->config.bypass;
+    } else {
+        bypassed = ep->domain->bypass;
+    }
+
+unlock:
+    qemu_rec_mutex_unlock(&s->mutex);
+    return bypassed;
+}
+
+/* Return whether the device is using IOMMU translation. */
+static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev)
+{
+    bool use_remapping;
+
+    assert(sdev);
+
+    use_remapping = !virtio_iommu_device_bypassed(sdev);
+
+    trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus),
+                                            PCI_SLOT(sdev->devfn),
+                                            PCI_FUNC(sdev->devfn),
+                                            use_remapping);
+
+    /* Turn off first then on the other */
+    if (use_remapping) {
+        memory_region_set_enabled(&sdev->bypass_mr, false);
+        memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true);
+    } else {
+        memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false);
+        memory_region_set_enabled(&sdev->bypass_mr, true);
+    }
+
+    return use_remapping;
+}
+
+static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s)
+{
+    GHashTableIter iter;
+    IOMMUPciBus *iommu_pci_bus;
+    int i;
+
+    g_hash_table_iter_init(&iter, s->as_by_busptr);
+    while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
+        for (i = 0; i < PCI_DEVFN_MAX; i++) {
+            if (!iommu_pci_bus->pbdev[i]) {
+                continue;
+            }
+            virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]);
+        }
+    }
+}
+
+/**
+ * The bus number is used for lookup when SID based operations occur.
+ * In that case we lazily populate the IOMMUPciBus array from the bus hash
+ * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
+ * numbers may not be always initialized yet.
+ */
+static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
+{
+    IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
+
+    if (!iommu_pci_bus) {
+        GHashTableIter iter;
+
+        g_hash_table_iter_init(&iter, s->as_by_busptr);
+        while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
+            if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
+                s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
+                return iommu_pci_bus;
+            }
+        }
+        return NULL;
+    }
+    return iommu_pci_bus;
+}
+
+static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
+{
+    uint8_t bus_n, devfn;
+    IOMMUPciBus *iommu_pci_bus;
+    IOMMUDevice *dev;
+
+    bus_n = PCI_BUS_NUM(sid);
+    iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
+    if (iommu_pci_bus) {
+        devfn = sid & (PCI_DEVFN_MAX - 1);
+        dev = iommu_pci_bus->pbdev[devfn];
+        if (dev) {
+            return &dev->iommu_mr;
+        }
+    }
+    return NULL;
+}
+
+static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
+    VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
+
+    if (inta->high < intb->low) {
+        return -1;
+    } else if (intb->high < inta->low) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
+                                          IOMMUTLBEvent *event,
+                                          hwaddr virt_start, hwaddr virt_end)
+{
+    uint64_t delta = virt_end - virt_start;
+
+    event->entry.iova = virt_start;
+    event->entry.addr_mask = delta;
+
+    if (delta == UINT64_MAX) {
+        memory_region_notify_iommu(mr, 0, *event);
+    }
+
+    while (virt_start != virt_end + 1) {
+        uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
+
+        event->entry.addr_mask = mask;
+        event->entry.iova = virt_start;
+        memory_region_notify_iommu(mr, 0, *event);
+        virt_start += mask + 1;
+        if (event->entry.perm != IOMMU_NONE) {
+            event->entry.translated_addr += mask + 1;
+        }
+    }
+}
+
+static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
+                                    hwaddr virt_end, hwaddr paddr,
+                                    uint32_t flags)
+{
+    IOMMUTLBEvent event;
+    IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ,
+                                              flags & VIRTIO_IOMMU_MAP_F_WRITE);
+
+    if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_MAP) ||
+        (flags & VIRTIO_IOMMU_MAP_F_MMIO) || !perm) {
+        return;
+    }
+
+    trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end,
+                                  paddr, perm);
+
+    event.type = IOMMU_NOTIFIER_MAP;
+    event.entry.target_as = &address_space_memory;
+    event.entry.perm = perm;
+    event.entry.translated_addr = paddr;
+
+    virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
+}
+
+static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
+                                      hwaddr virt_end)
+{
+    IOMMUTLBEvent event;
+
+    if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
+        return;
+    }
+
+    trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end);
+
+    event.type = IOMMU_NOTIFIER_UNMAP;
+    event.entry.target_as = &address_space_memory;
+    event.entry.perm = IOMMU_NONE;
+    event.entry.translated_addr = 0;
+
+    virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end);
+}
+
+static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
+                                             gpointer data)
+{
+    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
+    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
+
+    virtio_iommu_notify_unmap(mr, interval->low, interval->high);
+
+    return false;
+}
+
+static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value,
+                                           gpointer data)
+{
+    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
+    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
+    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
+
+    virtio_iommu_notify_map(mr, interval->low, interval->high,
+                            mapping->phys_addr, mapping->flags);
+
+    return false;
+}
+
+static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
+{
+    VirtIOIOMMUDomain *domain = ep->domain;
+    IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
+
+    if (!ep->domain) {
+        return;
+    }
+    g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
+                   ep->iommu_mr);
+    QLIST_REMOVE(ep, next);
+    ep->domain = NULL;
+    virtio_iommu_switch_address_space(sdev);
+}
+
+static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
+                                                      uint32_t ep_id)
+{
+    VirtIOIOMMUEndpoint *ep;
+    IOMMUMemoryRegion *mr;
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
+    if (ep) {
+        return ep;
+    }
+    mr = virtio_iommu_mr(s, ep_id);
+    if (!mr) {
+        return NULL;
+    }
+    ep = g_malloc0(sizeof(*ep));
+    ep->id = ep_id;
+    ep->iommu_mr = mr;
+    trace_virtio_iommu_get_endpoint(ep_id);
+    g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
+    return ep;
+}
+
+static void virtio_iommu_put_endpoint(gpointer data)
+{
+    VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
+
+    if (ep->domain) {
+        virtio_iommu_detach_endpoint_from_domain(ep);
+    }
+
+    trace_virtio_iommu_put_endpoint(ep->id);
+    g_free(ep);
+}
+
+static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
+                                                  uint32_t domain_id,
+                                                  bool bypass)
+{
+    VirtIOIOMMUDomain *domain;
+
+    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+    if (domain) {
+        if (domain->bypass != bypass) {
+            return NULL;
+        }
+        return domain;
+    }
+    domain = g_malloc0(sizeof(*domain));
+    domain->id = domain_id;
+    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+                                   NULL, (GDestroyNotify)g_free,
+                                   (GDestroyNotify)g_free);
+    domain->bypass = bypass;
+    g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
+    QLIST_INIT(&domain->endpoint_list);
+    trace_virtio_iommu_get_domain(domain_id);
+    return domain;
+}
+
+static void virtio_iommu_put_domain(gpointer data)
+{
+    VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
+    VirtIOIOMMUEndpoint *iter, *tmp;
+
+    QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
+        virtio_iommu_detach_endpoint_from_domain(iter);
+    }
+    g_tree_destroy(domain->mappings);
+    trace_virtio_iommu_put_domain(domain->id);
+    g_free(domain);
+}
+
+static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
+                                              int devfn)
+{
+    VirtIOIOMMU *s = opaque;
+    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+    static uint32_t mr_index;
+    IOMMUDevice *sdev;
+
+    if (!sbus) {
+        sbus = g_malloc0(sizeof(IOMMUPciBus) +
+                         sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
+        sbus->bus = bus;
+        g_hash_table_insert(s->as_by_busptr, bus, sbus);
+    }
+
+    sdev = sbus->pbdev[devfn];
+    if (!sdev) {
+        char *name = g_strdup_printf("%s-%d-%d",
+                                     TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+                                     mr_index++, devfn);
+        sdev = sbus->pbdev[devfn] = g_new0(IOMMUDevice, 1);
+
+        sdev->viommu = s;
+        sdev->bus = bus;
+        sdev->devfn = devfn;
+
+        trace_virtio_iommu_init_iommu_mr(name);
+
+        memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
+        address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
+
+        /*
+         * Build the IOMMU disabled container with aliases to the
+         * shared MRs.  Note that aliasing to a shared memory region
+         * could help the memory API to detect same FlatViews so we
+         * can have devices to share the same FlatView when in bypass
+         * mode. (either by not configuring virtio-iommu driver or with
+         * "iommu=pt").  It will greatly reduce the total number of
+         * FlatViews of the system hence VM runs faster.
+         */
+        memory_region_init_alias(&sdev->bypass_mr, OBJECT(s),
+                                 "system", get_system_memory(), 0,
+                                 memory_region_size(get_system_memory()));
+
+        memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
+                                 TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+                                 OBJECT(s), name,
+                                 UINT64_MAX);
+
+        /*
+         * Hook both the containers under the root container, we
+         * switch between iommu & bypass MRs by enable/disable
+         * corresponding sub-containers
+         */
+        memory_region_add_subregion_overlap(&sdev->root, 0,
+                                            MEMORY_REGION(&sdev->iommu_mr),
+                                            0);
+        memory_region_add_subregion_overlap(&sdev->root, 0,
+                                            &sdev->bypass_mr, 0);
+
+        virtio_iommu_switch_address_space(sdev);
+        g_free(name);
+    }
+    return &sdev->as;
+}
+
+static int virtio_iommu_attach(VirtIOIOMMU *s,
+                               struct virtio_iommu_req_attach *req)
+{
+    uint32_t domain_id = le32_to_cpu(req->domain);
+    uint32_t ep_id = le32_to_cpu(req->endpoint);
+    uint32_t flags = le32_to_cpu(req->flags);
+    VirtIOIOMMUDomain *domain;
+    VirtIOIOMMUEndpoint *ep;
+    IOMMUDevice *sdev;
+
+    trace_virtio_iommu_attach(domain_id, ep_id);
+
+    if (flags & ~VIRTIO_IOMMU_ATTACH_F_BYPASS) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    ep = virtio_iommu_get_endpoint(s, ep_id);
+    if (!ep) {
+        return VIRTIO_IOMMU_S_NOENT;
+    }
+
+    if (ep->domain) {
+        VirtIOIOMMUDomain *previous_domain = ep->domain;
+        /*
+         * the device is already attached to a domain,
+         * detach it first
+         */
+        virtio_iommu_detach_endpoint_from_domain(ep);
+        if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
+            g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
+        }
+    }
+
+    domain = virtio_iommu_get_domain(s, domain_id,
+                                     flags & VIRTIO_IOMMU_ATTACH_F_BYPASS);
+    if (!domain) {
+        /* Incompatible bypass flag */
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+    QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
+
+    ep->domain = domain;
+    sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr);
+    virtio_iommu_switch_address_space(sdev);
+
+    /* Replay domain mappings on the associated memory region */
+    g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb,
+                   ep->iommu_mr);
+
+    return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_detach(VirtIOIOMMU *s,
+                               struct virtio_iommu_req_detach *req)
+{
+    uint32_t domain_id = le32_to_cpu(req->domain);
+    uint32_t ep_id = le32_to_cpu(req->endpoint);
+    VirtIOIOMMUDomain *domain;
+    VirtIOIOMMUEndpoint *ep;
+
+    trace_virtio_iommu_detach(domain_id, ep_id);
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
+    if (!ep) {
+        return VIRTIO_IOMMU_S_NOENT;
+    }
+
+    domain = ep->domain;
+
+    if (!domain || domain->id != domain_id) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    virtio_iommu_detach_endpoint_from_domain(ep);
+
+    if (QLIST_EMPTY(&domain->endpoint_list)) {
+        g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
+    }
+    return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_map(VirtIOIOMMU *s,
+                            struct virtio_iommu_req_map *req)
+{
+    uint32_t domain_id = le32_to_cpu(req->domain);
+    uint64_t phys_start = le64_to_cpu(req->phys_start);
+    uint64_t virt_start = le64_to_cpu(req->virt_start);
+    uint64_t virt_end = le64_to_cpu(req->virt_end);
+    uint32_t flags = le32_to_cpu(req->flags);
+    VirtIOIOMMUDomain *domain;
+    VirtIOIOMMUInterval *interval;
+    VirtIOIOMMUMapping *mapping;
+    VirtIOIOMMUEndpoint *ep;
+
+    if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+    if (!domain) {
+        return VIRTIO_IOMMU_S_NOENT;
+    }
+
+    if (domain->bypass) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    interval = g_malloc0(sizeof(*interval));
+
+    interval->low = virt_start;
+    interval->high = virt_end;
+
+    mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
+    if (mapping) {
+        g_free(interval);
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
+
+    mapping = g_malloc0(sizeof(*mapping));
+    mapping->phys_addr = phys_start;
+    mapping->flags = flags;
+
+    g_tree_insert(domain->mappings, interval, mapping);
+
+    QLIST_FOREACH(ep, &domain->endpoint_list, next) {
+        virtio_iommu_notify_map(ep->iommu_mr, virt_start, virt_end, phys_start,
+                                flags);
+    }
+
+    return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_unmap(VirtIOIOMMU *s,
+                              struct virtio_iommu_req_unmap *req)
+{
+    uint32_t domain_id = le32_to_cpu(req->domain);
+    uint64_t virt_start = le64_to_cpu(req->virt_start);
+    uint64_t virt_end = le64_to_cpu(req->virt_end);
+    VirtIOIOMMUMapping *iter_val;
+    VirtIOIOMMUInterval interval, *iter_key;
+    VirtIOIOMMUDomain *domain;
+    VirtIOIOMMUEndpoint *ep;
+    int ret = VIRTIO_IOMMU_S_OK;
+
+    trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
+
+    domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
+    if (!domain) {
+        return VIRTIO_IOMMU_S_NOENT;
+    }
+
+    if (domain->bypass) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    interval.low = virt_start;
+    interval.high = virt_end;
+
+    while (g_tree_lookup_extended(domain->mappings, &interval,
+                                  (void **)&iter_key, (void**)&iter_val)) {
+        uint64_t current_low = iter_key->low;
+        uint64_t current_high = iter_key->high;
+
+        if (interval.low <= current_low && interval.high >= current_high) {
+            QLIST_FOREACH(ep, &domain->endpoint_list, next) {
+                virtio_iommu_notify_unmap(ep->iommu_mr, current_low,
+                                          current_high);
+            }
+            g_tree_remove(domain->mappings, iter_key);
+            trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
+        } else {
+            ret = VIRTIO_IOMMU_S_RANGE;
+            break;
+        }
+    }
+    return ret;
+}
+
+static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep,
+                                               uint8_t *buf, size_t free)
+{
+    struct virtio_iommu_probe_resv_mem prop = {};
+    size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
+    int i;
+
+    total = size * s->nb_reserved_regions;
+
+    if (total > free) {
+        return -ENOSPC;
+    }
+
+    for (i = 0; i < s->nb_reserved_regions; i++) {
+        unsigned subtype = s->reserved_regions[i].type;
+
+        assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
+               subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
+        prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
+        prop.head.length = cpu_to_le16(length);
+        prop.subtype = subtype;
+        prop.start = cpu_to_le64(s->reserved_regions[i].low);
+        prop.end = cpu_to_le64(s->reserved_regions[i].high);
+
+        memcpy(buf, &prop, size);
+
+        trace_virtio_iommu_fill_resv_property(ep, prop.subtype,
+                                              prop.start, prop.end);
+        buf += size;
+    }
+    return total;
+}
+
+/**
+ * virtio_iommu_probe - Fill the probe request buffer with
+ * the properties the device is able to return
+ */
+static int virtio_iommu_probe(VirtIOIOMMU *s,
+                              struct virtio_iommu_req_probe *req,
+                              uint8_t *buf)
+{
+    uint32_t ep_id = le32_to_cpu(req->endpoint);
+    size_t free = VIOMMU_PROBE_SIZE;
+    ssize_t count;
+
+    if (!virtio_iommu_mr(s, ep_id)) {
+        return VIRTIO_IOMMU_S_NOENT;
+    }
+
+    count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free);
+    if (count < 0) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+    buf += count;
+    free -= count;
+
+    return VIRTIO_IOMMU_S_OK;
+}
+
+static int virtio_iommu_iov_to_req(struct iovec *iov,
+                                   unsigned int iov_cnt,
+                                   void *req, size_t payload_sz)
+{
+    size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
+
+    if (unlikely(sz != payload_sz)) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+    return 0;
+}
+
+#define virtio_iommu_handle_req(__req)                                  \
+static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s,                \
+                                         struct iovec *iov,             \
+                                         unsigned int iov_cnt)          \
+{                                                                       \
+    struct virtio_iommu_req_ ## __req req;                              \
+    int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req,               \
+                    sizeof(req) - sizeof(struct virtio_iommu_req_tail));\
+                                                                        \
+    return ret ? ret : virtio_iommu_ ## __req(s, &req);                 \
+}
+
+virtio_iommu_handle_req(attach)
+virtio_iommu_handle_req(detach)
+virtio_iommu_handle_req(map)
+virtio_iommu_handle_req(unmap)
+
+static int virtio_iommu_handle_probe(VirtIOIOMMU *s,
+                                     struct iovec *iov,
+                                     unsigned int iov_cnt,
+                                     uint8_t *buf)
+{
+    struct virtio_iommu_req_probe req;
+    int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req));
+
+    return ret ? ret : virtio_iommu_probe(s, &req, buf);
+}
+
+static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+    struct virtio_iommu_req_head head;
+    struct virtio_iommu_req_tail tail = {};
+    size_t output_size = sizeof(tail), sz;
+    VirtQueueElement *elem;
+    unsigned int iov_cnt;
+    struct iovec *iov;
+    void *buf = NULL;
+
+    for (;;) {
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            return;
+        }
+
+        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
+            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
+            virtio_error(vdev, "virtio-iommu bad head/tail size");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+
+        iov_cnt = elem->out_num;
+        iov = elem->out_sg;
+        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
+        if (unlikely(sz != sizeof(head))) {
+            tail.status = VIRTIO_IOMMU_S_DEVERR;
+            goto out;
+        }
+        qemu_rec_mutex_lock(&s->mutex);
+        switch (head.type) {
+        case VIRTIO_IOMMU_T_ATTACH:
+            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_DETACH:
+            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_MAP:
+            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_UNMAP:
+            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_PROBE:
+        {
+            struct virtio_iommu_req_tail *ptail;
+
+            output_size = s->config.probe_size + sizeof(tail);
+            buf = g_malloc0(output_size);
+
+            ptail = (struct virtio_iommu_req_tail *)
+                        (buf + s->config.probe_size);
+            ptail->status = virtio_iommu_handle_probe(s, iov, iov_cnt, buf);
+            break;
+        }
+        default:
+            tail.status = VIRTIO_IOMMU_S_UNSUPP;
+        }
+        qemu_rec_mutex_unlock(&s->mutex);
+
+out:
+        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
+                          buf ? buf : &tail, output_size);
+        assert(sz == output_size);
+
+        virtqueue_push(vq, elem, sz);
+        virtio_notify(vdev, vq);
+        g_free(elem);
+        g_free(buf);
+        buf = NULL;
+    }
+}
+
+static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
+                                      int flags, uint32_t endpoint,
+                                      uint64_t address)
+{
+    VirtIODevice *vdev = &viommu->parent_obj;
+    VirtQueue *vq = viommu->event_vq;
+    struct virtio_iommu_fault fault;
+    VirtQueueElement *elem;
+    size_t sz;
+
+    memset(&fault, 0, sizeof(fault));
+    fault.reason = reason;
+    fault.flags = cpu_to_le32(flags);
+    fault.endpoint = cpu_to_le32(endpoint);
+    fault.address = cpu_to_le64(address);
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+
+    if (!elem) {
+        error_report_once(
+            "no buffer available in event queue to report event");
+        return;
+    }
+
+    if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
+        virtio_error(vdev, "error buffer of wrong size");
+        virtqueue_detach_element(vq, elem, 0);
+        g_free(elem);
+        return;
+    }
+
+    sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
+                      &fault, sizeof(fault));
+    assert(sz == sizeof(fault));
+
+    trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
+    virtqueue_push(vq, elem, sz);
+    virtio_notify(vdev, vq);
+    g_free(elem);
+
+}
+
+static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
+                                            IOMMUAccessFlags flag,
+                                            int iommu_idx)
+{
+    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMUInterval interval, *mapping_key;
+    VirtIOIOMMUMapping *mapping_value;
+    VirtIOIOMMU *s = sdev->viommu;
+    bool read_fault, write_fault;
+    VirtIOIOMMUEndpoint *ep;
+    uint32_t sid, flags;
+    bool bypass_allowed;
+    bool found;
+    int i;
+
+    interval.low = addr;
+    interval.high = addr + 1;
+
+    IOMMUTLBEntry entry = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = addr,
+        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
+        .perm = IOMMU_NONE,
+    };
+
+    bypass_allowed = s->config.bypass;
+
+    sid = virtio_iommu_get_bdf(sdev);
+
+    trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
+    qemu_rec_mutex_lock(&s->mutex);
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+
+    if (bypass_allowed)
+        assert(ep && ep->domain && !ep->domain->bypass);
+
+    if (!ep) {
+        if (!bypass_allowed) {
+            error_report_once("%s sid=%d is not known!!", __func__, sid);
+            virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
+                                      VIRTIO_IOMMU_FAULT_F_ADDRESS,
+                                      sid, addr);
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    }
+
+    for (i = 0; i < s->nb_reserved_regions; i++) {
+        ReservedRegion *reg = &s->reserved_regions[i];
+
+        if (addr >= reg->low && addr <= reg->high) {
+            switch (reg->type) {
+            case VIRTIO_IOMMU_RESV_MEM_T_MSI:
+                entry.perm = flag;
+                break;
+            case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
+            default:
+                virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
+                                          VIRTIO_IOMMU_FAULT_F_ADDRESS,
+                                          sid, addr);
+                break;
+            }
+            goto unlock;
+        }
+    }
+
+    if (!ep->domain) {
+        if (!bypass_allowed) {
+            error_report_once("%s %02x:%02x.%01x not attached to any domain",
+                              __func__, PCI_BUS_NUM(sid),
+                              PCI_SLOT(sid), PCI_FUNC(sid));
+            virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
+                                      VIRTIO_IOMMU_FAULT_F_ADDRESS,
+                                      sid, addr);
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    } else if (ep->domain->bypass) {
+        entry.perm = flag;
+        goto unlock;
+    }
+
+    found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
+                                   (void **)&mapping_key,
+                                   (void **)&mapping_value);
+    if (!found) {
+        error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
+                          __func__, addr, sid);
+        virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
+                                  VIRTIO_IOMMU_FAULT_F_ADDRESS,
+                                  sid, addr);
+        goto unlock;
+    }
+
+    read_fault = (flag & IOMMU_RO) &&
+                    !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
+    write_fault = (flag & IOMMU_WO) &&
+                    !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
+
+    flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
+    flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
+    if (flags) {
+        error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
+                          __func__, addr, flag, mapping_value->flags);
+        flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
+        virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
+                                  flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
+                                  sid, addr);
+        goto unlock;
+    }
+    entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
+    entry.perm = flag;
+    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
+
+unlock:
+    qemu_rec_mutex_unlock(&s->mutex);
+    return entry;
+}
+
+static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+    struct virtio_iommu_config *dev_config = &dev->config;
+    struct virtio_iommu_config *out_config = (void *)config_data;
+
+    out_config->page_size_mask = cpu_to_le64(dev_config->page_size_mask);
+    out_config->input_range.start = cpu_to_le64(dev_config->input_range.start);
+    out_config->input_range.end = cpu_to_le64(dev_config->input_range.end);
+    out_config->domain_range.start = cpu_to_le32(dev_config->domain_range.start);
+    out_config->domain_range.end = cpu_to_le32(dev_config->domain_range.end);
+    out_config->probe_size = cpu_to_le32(dev_config->probe_size);
+    out_config->bypass = dev_config->bypass;
+
+    trace_virtio_iommu_get_config(dev_config->page_size_mask,
+                                  dev_config->input_range.start,
+                                  dev_config->input_range.end,
+                                  dev_config->domain_range.start,
+                                  dev_config->domain_range.end,
+                                  dev_config->probe_size,
+                                  dev_config->bypass);
+}
+
+static void virtio_iommu_set_config(VirtIODevice *vdev,
+                                    const uint8_t *config_data)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+    struct virtio_iommu_config *dev_config = &dev->config;
+    const struct virtio_iommu_config *in_config = (void *)config_data;
+
+    if (in_config->bypass != dev_config->bypass) {
+        if (!virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) {
+            virtio_error(vdev, "cannot set config.bypass");
+            return;
+        } else if (in_config->bypass != 0 && in_config->bypass != 1) {
+            virtio_error(vdev, "invalid config.bypass value '%u'",
+                         in_config->bypass);
+            return;
+        }
+        dev_config->bypass = in_config->bypass;
+        virtio_iommu_switch_address_space_all(dev);
+    }
+
+    trace_virtio_iommu_set_config(in_config->bypass);
+}
+
+static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
+                                          Error **errp)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+
+    f |= dev->features;
+    trace_virtio_iommu_get_features(f);
+    return f;
+}
+
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    guint ua = GPOINTER_TO_UINT(a);
+    guint ub = GPOINTER_TO_UINT(b);
+    return (ua > ub) - (ua < ub);
+}
+
+static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
+{
+    VirtIOIOMMUMapping *mapping = (VirtIOIOMMUMapping *) value;
+    VirtIOIOMMUInterval *interval = (VirtIOIOMMUInterval *) key;
+    IOMMUMemoryRegion *mr = (IOMMUMemoryRegion *) data;
+
+    trace_virtio_iommu_remap(mr->parent_obj.name, interval->low, interval->high,
+                             mapping->phys_addr);
+    virtio_iommu_notify_map(mr, interval->low, interval->high,
+                            mapping->phys_addr, mapping->flags);
+    return false;
+}
+
+static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
+{
+    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
+    uint32_t sid;
+    VirtIOIOMMUEndpoint *ep;
+
+    sid = virtio_iommu_get_bdf(sdev);
+
+    qemu_rec_mutex_lock(&s->mutex);
+
+    if (!s->endpoints) {
+        goto unlock;
+    }
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+    if (!ep || !ep->domain) {
+        goto unlock;
+    }
+
+    g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr);
+
+unlock:
+    qemu_rec_mutex_unlock(&s->mutex);
+}
+
+static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
+                                            IOMMUNotifierFlag old,
+                                            IOMMUNotifierFlag new,
+                                            Error **errp)
+{
+    if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
+        error_setg(errp, "Virtio-iommu does not support dev-iotlb yet");
+        return -EINVAL;
+    }
+
+    if (old == IOMMU_NOTIFIER_NONE) {
+        trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name);
+    } else if (new == IOMMU_NOTIFIER_NONE) {
+        trace_virtio_iommu_notify_flag_del(iommu_mr->parent_obj.name);
+    }
+    return 0;
+}
+
+/*
+ * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
+ * for example 0xfffffffffffff000. When an assigned device has page size
+ * restrictions due to the hardware IOMMU configuration, apply this restriction
+ * to the mask.
+ */
+static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
+                                           uint64_t new_mask,
+                                           Error **errp)
+{
+    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
+    uint64_t cur_mask = s->config.page_size_mask;
+
+    trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask,
+                                          new_mask);
+
+    if ((cur_mask & new_mask) == 0) {
+        error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
+                   " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask);
+        return -1;
+    }
+
+    /*
+     * After the machine is finalized, we can't change the mask anymore. If by
+     * chance the hotplugged device supports the same granule, we can still
+     * accept it. Having a different masks is possible but the guest will use
+     * sub-optimal block sizes, so warn about it.
+     */
+    if (phase_check(PHASE_MACHINE_READY)) {
+        int new_granule = ctz64(new_mask);
+        int cur_granule = ctz64(cur_mask);
+
+        if (new_granule != cur_granule) {
+            error_setg(errp, "virtio-iommu page mask 0x%"PRIx64
+                       " is incompatible with mask 0x%"PRIx64, cur_mask,
+                       new_mask);
+            return -1;
+        } else if (new_mask != cur_mask) {
+            warn_report("virtio-iommu page mask 0x%"PRIx64
+                        " does not match 0x%"PRIx64, cur_mask, new_mask);
+        }
+        return 0;
+    }
+
+    s->config.page_size_mask &= new_mask;
+    return 0;
+}
+
+static void virtio_iommu_system_reset(void *opaque)
+{
+    VirtIOIOMMU *s = opaque;
+
+    trace_virtio_iommu_system_reset();
+
+    /*
+     * config.bypass is sticky across device reset, but should be restored on
+     * system reset
+     */
+    s->config.bypass = s->boot_bypass;
+    virtio_iommu_switch_address_space_all(s);
+
+}
+
+static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+    virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
+
+    memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
+
+    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
+                             virtio_iommu_handle_command);
+    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
+
+    /*
+     * config.bypass is needed to get initial address space early, such as
+     * in vfio realize
+     */
+    s->config.bypass = s->boot_bypass;
+    s->config.page_size_mask = TARGET_PAGE_MASK;
+    s->config.input_range.end = UINT64_MAX;
+    s->config.domain_range.end = UINT32_MAX;
+    s->config.probe_size = VIOMMU_PROBE_SIZE;
+
+    virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
+    virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
+    virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG);
+
+    qemu_rec_mutex_init(&s->mutex);
+
+    s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
+
+    if (s->primary_bus) {
+        pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
+    } else {
+        error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
+    }
+
+    qemu_register_reset(virtio_iommu_system_reset, s);
+}
+
+static void virtio_iommu_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+    qemu_unregister_reset(virtio_iommu_system_reset, s);
+
+    g_hash_table_destroy(s->as_by_busptr);
+    if (s->domains) {
+        g_tree_destroy(s->domains);
+    }
+    if (s->endpoints) {
+        g_tree_destroy(s->endpoints);
+    }
+
+    qemu_rec_mutex_destroy(&s->mutex);
+
+    virtio_delete_queue(s->req_vq);
+    virtio_delete_queue(s->event_vq);
+    virtio_cleanup(vdev);
+}
+
+static void virtio_iommu_device_reset(VirtIODevice *vdev)
+{
+    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+
+    trace_virtio_iommu_device_reset();
+
+    if (s->domains) {
+        g_tree_destroy(s->domains);
+    }
+    if (s->endpoints) {
+        g_tree_destroy(s->endpoints);
+    }
+    s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
+                                 NULL, NULL, virtio_iommu_put_domain);
+    s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
+                                   NULL, NULL, virtio_iommu_put_endpoint);
+}
+
+static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    trace_virtio_iommu_device_status(status);
+}
+
+static void virtio_iommu_instance_init(Object *obj)
+{
+}
+
+#define VMSTATE_INTERVAL                               \
+{                                                      \
+    .name = "interval",                                \
+    .version_id = 1,                                   \
+    .minimum_version_id = 1,                           \
+    .fields = (VMStateField[]) {                       \
+        VMSTATE_UINT64(low, VirtIOIOMMUInterval),      \
+        VMSTATE_UINT64(high, VirtIOIOMMUInterval),     \
+        VMSTATE_END_OF_LIST()                          \
+    }                                                  \
+}
+
+#define VMSTATE_MAPPING                               \
+{                                                     \
+    .name = "mapping",                                \
+    .version_id = 1,                                  \
+    .minimum_version_id = 1,                          \
+    .fields = (VMStateField[]) {                      \
+        VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
+        VMSTATE_UINT32(flags, VirtIOIOMMUMapping),    \
+        VMSTATE_END_OF_LIST()                         \
+    },                                                \
+}
+
+static const VMStateDescription vmstate_interval_mapping[2] = {
+    VMSTATE_MAPPING,   /* value */
+    VMSTATE_INTERVAL   /* key   */
+};
+
+static int domain_preload(void *opaque)
+{
+    VirtIOIOMMUDomain *domain = opaque;
+
+    domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+                                       NULL, g_free, g_free);
+    return 0;
+}
+
+static const VMStateDescription vmstate_endpoint = {
+    .name = "endpoint",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_domain = {
+    .name = "domain",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .pre_load = domain_preload,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(id, VirtIOIOMMUDomain),
+        VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
+                        vmstate_interval_mapping,
+                        VirtIOIOMMUInterval, VirtIOIOMMUMapping),
+        VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
+                        vmstate_endpoint, VirtIOIOMMUEndpoint, next),
+        VMSTATE_BOOL_V(bypass, VirtIOIOMMUDomain, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static gboolean reconstruct_endpoints(gpointer key, gpointer value,
+                                      gpointer data)
+{
+    VirtIOIOMMU *s = (VirtIOIOMMU *)data;
+    VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
+    VirtIOIOMMUEndpoint *iter;
+    IOMMUMemoryRegion *mr;
+
+    QLIST_FOREACH(iter, &d->endpoint_list, next) {
+        mr = virtio_iommu_mr(s, iter->id);
+        assert(mr);
+
+        iter->domain = d;
+        iter->iommu_mr = mr;
+        g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
+    }
+    return false; /* continue the domain traversal */
+}
+
+static int iommu_post_load(void *opaque, int version_id)
+{
+    VirtIOIOMMU *s = opaque;
+
+    g_tree_foreach(s->domains, reconstruct_endpoints, s);
+
+    /*
+     * Memory regions are dynamically turned on/off depending on
+     * 'config.bypass' and attached domain type if there is. After
+     * migration, we need to make sure the memory regions are
+     * still correct.
+     */
+    virtio_iommu_switch_address_space_all(s);
+    return 0;
+}
+
+static const VMStateDescription vmstate_virtio_iommu_device = {
+    .name = "virtio-iommu-device",
+    .minimum_version_id = 2,
+    .version_id = 2,
+    .post_load = iommu_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 2,
+                                   &vmstate_domain, VirtIOIOMMUDomain),
+        VMSTATE_UINT8_V(config.bypass, VirtIOIOMMU, 2),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_virtio_iommu = {
+    .name = "virtio-iommu",
+    .minimum_version_id = 2,
+    .priority = MIG_PRI_IOMMU,
+    .version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_iommu_properties[] = {
+    DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+    DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_iommu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_iommu_properties);
+    dc->vmsd = &vmstate_virtio_iommu;
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_iommu_device_realize;
+    vdc->unrealize = virtio_iommu_device_unrealize;
+    vdc->reset = virtio_iommu_device_reset;
+    vdc->get_config = virtio_iommu_get_config;
+    vdc->set_config = virtio_iommu_set_config;
+    vdc->get_features = virtio_iommu_get_features;
+    vdc->set_status = virtio_iommu_set_status;
+    vdc->vmsd = &vmstate_virtio_iommu_device;
+}
+
+static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
+                                                  void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+    imrc->translate = virtio_iommu_translate;
+    imrc->replay = virtio_iommu_replay;
+    imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
+    imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
+}
+
+static const TypeInfo virtio_iommu_info = {
+    .name = TYPE_VIRTIO_IOMMU,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOIOMMU),
+    .instance_init = virtio_iommu_instance_init,
+    .class_init = virtio_iommu_class_init,
+};
+
+static const TypeInfo virtio_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
+    .class_init = virtio_iommu_memory_region_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_iommu_info);
+    type_register_static(&virtio_iommu_memory_region_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c
new file mode 100644
index 00000000..5c5c1e3a
--- /dev/null
+++ b/hw/virtio/virtio-mem-pci.c
@@ -0,0 +1,161 @@
+/*
+ * Virtio MEM PCI device
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "virtio-mem-pci.h"
+#include "hw/mem/memory-device.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-misc.h"
+
+static void virtio_mem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOMEMPCI *mem_pci = VIRTIO_MEM_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&mem_pci->vdev);
+
+    virtio_pci_force_virtio_1(vpci_dev);
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_mem_pci_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                    Error **errp)
+{
+    object_property_set_uint(OBJECT(md), VIRTIO_MEM_ADDR_PROP, addr, errp);
+}
+
+static uint64_t virtio_mem_pci_get_addr(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), VIRTIO_MEM_ADDR_PROP,
+                                    &error_abort);
+}
+
+static MemoryRegion *virtio_mem_pci_get_memory_region(MemoryDeviceState *md,
+                                                      Error **errp)
+{
+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+    VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+    return vmc->get_memory_region(vmem, errp);
+}
+
+static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,
+                                                Error **errp)
+{
+    return object_property_get_uint(OBJECT(md), VIRTIO_MEM_SIZE_PROP,
+                                    errp);
+}
+
+static void virtio_mem_pci_fill_device_info(const MemoryDeviceState *md,
+                                            MemoryDeviceInfo *info)
+{
+    VirtioMEMDeviceInfo *vi = g_new0(VirtioMEMDeviceInfo, 1);
+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+    VirtIOMEMClass *vpc = VIRTIO_MEM_GET_CLASS(vmem);
+    DeviceState *dev = DEVICE(md);
+
+    if (dev->id) {
+        vi->has_id = true;
+        vi->id = g_strdup(dev->id);
+    }
+
+    /* let the real device handle everything else */
+    vpc->fill_device_info(vmem, vi);
+
+    info->u.virtio_mem.data = vi;
+    info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_MEM;
+}
+
+static uint64_t virtio_mem_pci_get_min_alignment(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), VIRTIO_MEM_BLOCK_SIZE_PROP,
+                                    &error_abort);
+}
+
+static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data)
+{
+    VirtIOMEMPCI *pci_mem = container_of(notifier, VirtIOMEMPCI,
+                                         size_change_notifier);
+    DeviceState *dev = DEVICE(pci_mem);
+    char *qom_path = object_get_canonical_path(OBJECT(dev));
+    const uint64_t * const size_p = data;
+
+    qapi_event_send_memory_device_size_change(!!dev->id, dev->id, *size_p,
+                                              qom_path);
+    g_free(qom_path);
+}
+
+static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    k->realize = virtio_mem_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+
+    mdc->get_addr = virtio_mem_pci_get_addr;
+    mdc->set_addr = virtio_mem_pci_set_addr;
+    mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
+    mdc->get_memory_region = virtio_mem_pci_get_memory_region;
+    mdc->fill_device_info = virtio_mem_pci_fill_device_info;
+    mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;
+}
+
+static void virtio_mem_pci_instance_init(Object *obj)
+{
+    VirtIOMEMPCI *dev = VIRTIO_MEM_PCI(obj);
+    VirtIOMEMClass *vmc;
+    VirtIOMEM *vmem;
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_MEM);
+
+    dev->size_change_notifier.notify = virtio_mem_pci_size_change_notify;
+    vmem = VIRTIO_MEM(&dev->vdev);
+    vmc = VIRTIO_MEM_GET_CLASS(vmem);
+    /*
+     * We never remove the notifier again, as we expect both devices to
+     * disappear at the same time.
+     */
+    vmc->add_size_change_notifier(vmem, &dev->size_change_notifier);
+
+    object_property_add_alias(obj, VIRTIO_MEM_BLOCK_SIZE_PROP,
+                              OBJECT(&dev->vdev), VIRTIO_MEM_BLOCK_SIZE_PROP);
+    object_property_add_alias(obj, VIRTIO_MEM_SIZE_PROP, OBJECT(&dev->vdev),
+                              VIRTIO_MEM_SIZE_PROP);
+    object_property_add_alias(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP,
+                              OBJECT(&dev->vdev),
+                              VIRTIO_MEM_REQUESTED_SIZE_PROP);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_mem_pci_info = {
+    .base_name = TYPE_VIRTIO_MEM_PCI,
+    .generic_name = "virtio-mem-pci",
+    .instance_size = sizeof(VirtIOMEMPCI),
+    .instance_init = virtio_mem_pci_instance_init,
+    .class_init = virtio_mem_pci_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_MEMORY_DEVICE },
+        { }
+    },
+};
+
+static void virtio_mem_pci_register_types(void)
+{
+    virtio_pci_types_register(&virtio_mem_pci_info);
+}
+type_init(virtio_mem_pci_register_types)
diff --git a/hw/virtio/virtio-mem-pci.h b/hw/virtio/virtio-mem-pci.h
new file mode 100644
index 00000000..e636e1a4
--- /dev/null
+++ b/hw/virtio/virtio-mem-pci.h
@@ -0,0 +1,35 @@
+/*
+ * Virtio MEM PCI device
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_MEM_PCI_H
+#define QEMU_VIRTIO_MEM_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-mem.h"
+#include "qom/object.h"
+
+typedef struct VirtIOMEMPCI VirtIOMEMPCI;
+
+/*
+ * virtio-mem-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_MEM_PCI "virtio-mem-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOMEMPCI, VIRTIO_MEM_PCI,
+                         TYPE_VIRTIO_MEM_PCI)
+
+struct VirtIOMEMPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOMEM vdev;
+    Notifier size_change_notifier;
+};
+
+#endif /* QEMU_VIRTIO_MEM_PCI_H */
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
new file mode 100644
index 00000000..ed170def
--- /dev/null
+++ b/hw/virtio/virtio-mem.c
@@ -0,0 +1,1386 @@
+/*
+ * Virtio MEM device
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/units.h"
+#include "sysemu/numa.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-mem.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "exec/ram_addr.h"
+#include "migration/misc.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include CONFIG_DEVICES
+#include "trace.h"
+
+/*
+ * We only had legacy x86 guests that did not support
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
+ */
+#if defined(TARGET_X86_64) || defined(TARGET_I386)
+#define VIRTIO_MEM_HAS_LEGACY_GUESTS
+#endif
+
+/*
+ * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
+ * bitmap small.
+ */
+#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
+
+static uint32_t virtio_mem_default_thp_size(void)
+{
+    uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
+    default_thp_size = 2 * MiB;
+#elif defined(__aarch64__)
+    if (qemu_real_host_page_size() == 4 * KiB) {
+        default_thp_size = 2 * MiB;
+    } else if (qemu_real_host_page_size() == 16 * KiB) {
+        default_thp_size = 32 * MiB;
+    } else if (qemu_real_host_page_size() == 64 * KiB) {
+        default_thp_size = 512 * MiB;
+    }
+#endif
+
+    return default_thp_size;
+}
+
+/*
+ * We want to have a reasonable default block size such that
+ * 1. We avoid splitting THPs when unplugging memory, which degrades
+ *    performance.
+ * 2. We avoid placing THPs for plugged blocks that also cover unplugged
+ *    blocks.
+ *
+ * The actual THP size might differ between Linux kernels, so we try to probe
+ * it. In the future (if we ever run into issues regarding 2.), we might want
+ * to disable THP in case we fail to properly probe the THP size, or if the
+ * block size is configured smaller than the THP size.
+ */
+static uint32_t thp_size;
+
+#define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+static uint32_t virtio_mem_thp_size(void)
+{
+    gchar *content = NULL;
+    const char *endptr;
+    uint64_t tmp;
+
+    if (thp_size) {
+        return thp_size;
+    }
+
+    /*
+     * Try to probe the actual THP size, fallback to (sane but eventually
+     * incorrect) default sizes.
+     */
+    if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
+        !qemu_strtou64(content, &endptr, 0, &tmp) &&
+        (!endptr || *endptr == '\n')) {
+        /* Sanity-check the value and fallback to something reasonable. */
+        if (!tmp || !is_power_of_2(tmp)) {
+            warn_report("Read unsupported THP size: %" PRIx64, tmp);
+        } else {
+            thp_size = tmp;
+        }
+    }
+
+    if (!thp_size) {
+        thp_size = virtio_mem_default_thp_size();
+        warn_report("Could not detect THP size, falling back to %" PRIx64
+                    "  MiB.", thp_size / MiB);
+    }
+
+    g_free(content);
+    return thp_size;
+}
+
+static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
+{
+    const uint64_t page_size = qemu_ram_pagesize(rb);
+
+    /* We can have hugetlbfs with a page size smaller than the THP size. */
+    if (page_size == qemu_real_host_page_size()) {
+        return MAX(page_size, virtio_mem_thp_size());
+    }
+    return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
+}
+
+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
+{
+    /*
+     * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
+     * anonymous RAM. In any other case, reading unplugged *can* populate a
+     * fresh page, consuming actual memory.
+     */
+    return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
+           qemu_ram_pagesize(rb) == qemu_real_host_page_size();
+}
+#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+
+/*
+ * Size the usable region bigger than the requested size if possible. Esp.
+ * Linux guests will only add (aligned) memory blocks in case they fully
+ * fit into the usable region, but plug+online only a subset of the pages.
+ * The memory block size corresponds mostly to the section size.
+ *
+ * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
+ * a section size of 512MB on arm64 (as long as the start address is properly
+ * aligned, similar to ordinary DIMMs).
+ *
+ * We can change this at any time and maybe even make it configurable if
+ * necessary (as the section size can change). But it's more likely that the
+ * section size will rather get smaller and not bigger over time.
+ */
+#if defined(TARGET_X86_64) || defined(TARGET_I386)
+#define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
+#elif defined(TARGET_ARM)
+#define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
+#else
+#error VIRTIO_MEM_USABLE_EXTENT not defined
+#endif
+
+static bool virtio_mem_is_busy(void)
+{
+    /*
+     * Postcopy cannot handle concurrent discards and we don't want to migrate
+     * pages on-demand with stale content when plugging new blocks.
+     *
+     * For precopy, we don't want unplugged blocks in our migration stream, and
+     * when plugging new blocks, the page content might differ between source
+     * and destination (observable by the guest when not initializing pages
+     * after plugging them) until we're running on the destination (as we didn't
+     * migrate these blocks when they were unplugged).
+     */
+    return migration_in_incoming_postcopy() || !migration_is_idle();
+}
+
+typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+                                   uint64_t offset, uint64_t size);
+
+static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+                                               virtio_mem_range_cb cb)
+{
+    unsigned long first_zero_bit, last_zero_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
+    while (first_zero_bit < vmem->bitmap_size) {
+        offset = first_zero_bit * vmem->block_size;
+        last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                      first_zero_bit + 1) - 1;
+        size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+
+        ret = cb(vmem, arg, offset, size);
+        if (ret) {
+            break;
+        }
+        first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                            last_zero_bit + 2);
+    }
+    return ret;
+}
+
+/*
+ * Adjust the memory section to cover the intersection with the given range.
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
+                                                uint64_t offset, uint64_t size)
+{
+    uint64_t start = MAX(s->offset_within_region, offset);
+    uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
+                       offset + size);
+
+    if (end <= start) {
+        return false;
+    }
+
+    s->offset_within_address_space += start - s->offset_within_region;
+    s->offset_within_region = start;
+    s->size = int128_make64(end - start);
+    return true;
+}
+
+typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
+
+static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
+                                               MemoryRegionSection *s,
+                                               void *arg,
+                                               virtio_mem_section_cb cb)
+{
+    unsigned long first_bit, last_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_bit = s->offset_within_region / vmem->bitmap_size;
+    first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+    while (first_bit < vmem->bitmap_size) {
+        MemoryRegionSection tmp = *s;
+
+        offset = first_bit * vmem->block_size;
+        last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                      first_bit + 1) - 1;
+        size = (last_bit - first_bit + 1) * vmem->block_size;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            break;
+        }
+        ret = cb(&tmp, arg);
+        if (ret) {
+            break;
+        }
+        first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                  last_bit + 2);
+    }
+    return ret;
+}
+
+static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
+                                                 MemoryRegionSection *s,
+                                                 void *arg,
+                                                 virtio_mem_section_cb cb)
+{
+    unsigned long first_bit, last_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_bit = s->offset_within_region / vmem->bitmap_size;
+    first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+    while (first_bit < vmem->bitmap_size) {
+        MemoryRegionSection tmp = *s;
+
+        offset = first_bit * vmem->block_size;
+        last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                 first_bit + 1) - 1;
+        size = (last_bit - first_bit + 1) * vmem->block_size;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            break;
+        }
+        ret = cb(&tmp, arg);
+        if (ret) {
+            break;
+        }
+        first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                       last_bit + 2);
+    }
+    return ret;
+}
+
+static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
+{
+    RamDiscardListener *rdl = arg;
+
+    return rdl->notify_populate(rdl, s);
+}
+
+static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
+{
+    RamDiscardListener *rdl = arg;
+
+    rdl->notify_discard(rdl, s);
+    return 0;
+}
+
+static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
+                                     uint64_t size)
+{
+    RamDiscardListener *rdl;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        MemoryRegionSection tmp = *rdl->section;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            continue;
+        }
+        rdl->notify_discard(rdl, &tmp);
+    }
+}
+
+static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
+                                  uint64_t size)
+{
+    RamDiscardListener *rdl, *rdl2;
+    int ret = 0;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        MemoryRegionSection tmp = *rdl->section;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            continue;
+        }
+        ret = rdl->notify_populate(rdl, &tmp);
+        if (ret) {
+            break;
+        }
+    }
+
+    if (ret) {
+        /* Notify all already-notified listeners. */
+        QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
+            MemoryRegionSection tmp = *rdl->section;
+
+            if (rdl2 == rdl) {
+                break;
+            }
+            if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+                continue;
+            }
+            rdl2->notify_discard(rdl2, &tmp);
+        }
+    }
+    return ret;
+}
+
+static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
+{
+    RamDiscardListener *rdl;
+
+    if (!vmem->size) {
+        return;
+    }
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        if (rdl->double_discard_supported) {
+            rdl->notify_discard(rdl, rdl->section);
+        } else {
+            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                virtio_mem_notify_discard_cb);
+        }
+    }
+}
+
+static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
+                                   uint64_t size, bool plugged)
+{
+    const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
+    const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
+    unsigned long found_bit;
+
+    /* We fake a shorter bitmap to avoid searching too far. */
+    if (plugged) {
+        found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
+    } else {
+        found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
+    }
+    return found_bit > last_bit;
+}
+
+static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
+                                  uint64_t size, bool plugged)
+{
+    const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
+    const unsigned long nbits = size / vmem->block_size;
+
+    if (plugged) {
+        bitmap_set(vmem->bitmap, bit, nbits);
+    } else {
+        bitmap_clear(vmem->bitmap, bit, nbits);
+    }
+}
+
+static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
+                                     struct virtio_mem_resp *resp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
+    VirtQueue *vq = vmem->vq;
+
+    trace_virtio_mem_send_response(le16_to_cpu(resp->type));
+    iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
+
+    virtqueue_push(vq, elem, sizeof(*resp));
+    virtio_notify(vdev, vq);
+}
+
+static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
+                                            VirtQueueElement *elem,
+                                            uint16_t type)
+{
+    struct virtio_mem_resp resp = {
+        .type = cpu_to_le16(type),
+    };
+
+    virtio_mem_send_response(vmem, elem, &resp);
+}
+
+static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
+                                   uint64_t size)
+{
+    if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
+        return false;
+    }
+    if (gpa + size < gpa || !size) {
+        return false;
+    }
+    if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
+        return false;
+    }
+    if (gpa + size > vmem->addr + vmem->usable_region_size) {
+        return false;
+    }
+    return true;
+}
+
+static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
+                                      uint64_t size, bool plug)
+{
+    const uint64_t offset = start_gpa - vmem->addr;
+    RAMBlock *rb = vmem->memdev->mr.ram_block;
+
+    if (virtio_mem_is_busy()) {
+        return -EBUSY;
+    }
+
+    if (!plug) {
+        if (ram_block_discard_range(rb, offset, size)) {
+            return -EBUSY;
+        }
+        virtio_mem_notify_unplug(vmem, offset, size);
+    } else {
+        int ret = 0;
+
+        if (vmem->prealloc) {
+            void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
+            int fd = memory_region_get_fd(&vmem->memdev->mr);
+            Error *local_err = NULL;
+
+            qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
+            if (local_err) {
+                static bool warned;
+
+                /*
+                 * Warn only once, we don't want to fill the log with these
+                 * warnings.
+                 */
+                if (!warned) {
+                    warn_report_err(local_err);
+                    warned = true;
+                } else {
+                    error_free(local_err);
+                }
+                ret = -EBUSY;
+            }
+        }
+        if (!ret) {
+            ret = virtio_mem_notify_plug(vmem, offset, size);
+        }
+
+        if (ret) {
+            /* Could be preallocation or a notifier populated memory. */
+            ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+            return -EBUSY;
+        }
+    }
+    virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
+    return 0;
+}
+
+static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
+                                           uint16_t nb_blocks, bool plug)
+{
+    const uint64_t size = nb_blocks * vmem->block_size;
+    int ret;
+
+    if (!virtio_mem_valid_range(vmem, gpa, size)) {
+        return VIRTIO_MEM_RESP_ERROR;
+    }
+
+    if (plug && (vmem->size + size > vmem->requested_size)) {
+        return VIRTIO_MEM_RESP_NACK;
+    }
+
+    /* test if really all blocks are in the opposite state */
+    if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
+        return VIRTIO_MEM_RESP_ERROR;
+    }
+
+    ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
+    if (ret) {
+        return VIRTIO_MEM_RESP_BUSY;
+    }
+    if (plug) {
+        vmem->size += size;
+    } else {
+        vmem->size -= size;
+    }
+    notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
+    return VIRTIO_MEM_RESP_ACK;
+}
+
+static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
+                                    struct virtio_mem_req *req)
+{
+    const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
+    const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
+    uint16_t type;
+
+    trace_virtio_mem_plug_request(gpa, nb_blocks);
+    type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
+    virtio_mem_send_response_simple(vmem, elem, type);
+}
+
+static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
+                                      struct virtio_mem_req *req)
+{
+    const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
+    const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
+    uint16_t type;
+
+    trace_virtio_mem_unplug_request(gpa, nb_blocks);
+    type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
+    virtio_mem_send_response_simple(vmem, elem, type);
+}
+
+static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
+                                            uint64_t requested_size,
+                                            bool can_shrink)
+{
+    uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
+                           requested_size + VIRTIO_MEM_USABLE_EXTENT);
+
+    /* The usable region size always has to be multiples of the block size. */
+    newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
+
+    if (!requested_size) {
+        newsize = 0;
+    }
+
+    if (newsize < vmem->usable_region_size && !can_shrink) {
+        return;
+    }
+
+    trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
+    vmem->usable_region_size = newsize;
+}
+
+static int virtio_mem_unplug_all(VirtIOMEM *vmem)
+{
+    RAMBlock *rb = vmem->memdev->mr.ram_block;
+
+    if (virtio_mem_is_busy()) {
+        return -EBUSY;
+    }
+
+    if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
+        return -EBUSY;
+    }
+    virtio_mem_notify_unplug_all(vmem);
+
+    bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
+    if (vmem->size) {
+        vmem->size = 0;
+        notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
+    }
+    trace_virtio_mem_unplugged_all();
+    virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
+    return 0;
+}
+
+static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
+                                          VirtQueueElement *elem)
+{
+    trace_virtio_mem_unplug_all_request();
+    if (virtio_mem_unplug_all(vmem)) {
+        virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
+    } else {
+        virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
+    }
+}
+
+static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
+                                     struct virtio_mem_req *req)
+{
+    const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
+    const uint64_t gpa = le64_to_cpu(req->u.state.addr);
+    const uint64_t size = nb_blocks * vmem->block_size;
+    struct virtio_mem_resp resp = {
+        .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
+    };
+
+    trace_virtio_mem_state_request(gpa, nb_blocks);
+    if (!virtio_mem_valid_range(vmem, gpa, size)) {
+        virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
+        return;
+    }
+
+    if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
+        resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
+    } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
+        resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
+    } else {
+        resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
+    }
+    trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
+    virtio_mem_send_response(vmem, elem, &resp);
+}
+
+static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
+{
+    const int len = sizeof(struct virtio_mem_req);
+    VirtIOMEM *vmem = VIRTIO_MEM(vdev);
+    VirtQueueElement *elem;
+    struct virtio_mem_req req;
+    uint16_t type;
+
+    while (true) {
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            return;
+        }
+
+        if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
+            virtio_error(vdev, "virtio-mem protocol violation: invalid request"
+                         " size: %d", len);
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            return;
+        }
+
+        if (iov_size(elem->in_sg, elem->in_num) <
+            sizeof(struct virtio_mem_resp)) {
+            virtio_error(vdev, "virtio-mem protocol violation: not enough space"
+                         " for response: %zu",
+                         iov_size(elem->in_sg, elem->in_num));
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            return;
+        }
+
+        type = le16_to_cpu(req.type);
+        switch (type) {
+        case VIRTIO_MEM_REQ_PLUG:
+            virtio_mem_plug_request(vmem, elem, &req);
+            break;
+        case VIRTIO_MEM_REQ_UNPLUG:
+            virtio_mem_unplug_request(vmem, elem, &req);
+            break;
+        case VIRTIO_MEM_REQ_UNPLUG_ALL:
+            virtio_mem_unplug_all_request(vmem, elem);
+            break;
+        case VIRTIO_MEM_REQ_STATE:
+            virtio_mem_state_request(vmem, elem, &req);
+            break;
+        default:
+            virtio_error(vdev, "virtio-mem protocol violation: unknown request"
+                         " type: %d", type);
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            return;
+        }
+
+        g_free(elem);
+    }
+}
+
+static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(vdev);
+    struct virtio_mem_config *config = (void *) config_data;
+
+    config->block_size = cpu_to_le64(vmem->block_size);
+    config->node_id = cpu_to_le16(vmem->node);
+    config->requested_size = cpu_to_le64(vmem->requested_size);
+    config->plugged_size = cpu_to_le64(vmem->size);
+    config->addr = cpu_to_le64(vmem->addr);
+    config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
+    config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
+}
+
+static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
+                                        Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    VirtIOMEM *vmem = VIRTIO_MEM(vdev);
+
+    if (ms->numa_state) {
+#if defined(CONFIG_ACPI)
+        virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
+#endif
+    }
+    assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
+    if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
+        virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
+    }
+    return features;
+}
+
+static int virtio_mem_validate_features(VirtIODevice *vdev)
+{
+    if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
+        !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
+        return -EFAULT;
+    }
+    return 0;
+}
+
+static void virtio_mem_system_reset(void *opaque)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+
+    /*
+     * During usual resets, we will unplug all memory and shrink the usable
+     * region size. This is, however, not possible in all scenarios. Then,
+     * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
+     */
+    virtio_mem_unplug_all(vmem);
+}
+
+static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOMEM *vmem = VIRTIO_MEM(dev);
+    uint64_t page_size;
+    RAMBlock *rb;
+    int ret;
+
+    if (!vmem->memdev) {
+        error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
+        return;
+    } else if (host_memory_backend_is_mapped(vmem->memdev)) {
+        error_setg(errp, "'%s' property specifies a busy memdev: %s",
+                   VIRTIO_MEM_MEMDEV_PROP,
+                   object_get_canonical_path_component(OBJECT(vmem->memdev)));
+        return;
+    } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
+        memory_region_is_rom(&vmem->memdev->mr) ||
+        !vmem->memdev->mr.ram_block) {
+        error_setg(errp, "'%s' property specifies an unsupported memdev",
+                   VIRTIO_MEM_MEMDEV_PROP);
+        return;
+    }
+
+    if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
+        (!nb_numa_nodes && vmem->node)) {
+        error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
+                   "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
+                   vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
+        return;
+    }
+
+    if (enable_mlock) {
+        error_setg(errp, "Incompatible with mlock");
+        return;
+    }
+
+    rb = vmem->memdev->mr.ram_block;
+    page_size = qemu_ram_pagesize(rb);
+
+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+    switch (vmem->unplugged_inaccessible) {
+    case ON_OFF_AUTO_AUTO:
+        if (virtio_mem_has_shared_zeropage(rb)) {
+            vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
+        } else {
+            vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
+        }
+        break;
+    case ON_OFF_AUTO_OFF:
+        if (!virtio_mem_has_shared_zeropage(rb)) {
+            warn_report("'%s' property set to 'off' with a memdev that does"
+                        " not support the shared zeropage.",
+                        VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
+        }
+        break;
+    default:
+        break;
+    }
+#else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+    vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
+#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
+
+    /*
+     * If the block size wasn't configured by the user, use a sane default. This
+     * allows using hugetlbfs backends of any page size without manual
+     * intervention.
+     */
+    if (!vmem->block_size) {
+        vmem->block_size = virtio_mem_default_block_size(rb);
+    }
+
+    if (vmem->block_size < page_size) {
+        error_setg(errp, "'%s' property has to be at least the page size (0x%"
+                   PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
+        return;
+    } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
+        warn_report("'%s' property is smaller than the default block size (%"
+                    PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
+                    virtio_mem_default_block_size(rb) / MiB);
+    }
+    if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
+        error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
+                   ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
+                   VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
+        return;
+    } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
+        error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
+                   ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
+                   vmem->block_size);
+        return;
+    } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
+                                vmem->block_size)) {
+        error_setg(errp, "'%s' property memdev size has to be multiples of"
+                   "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
+                   VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
+        return;
+    }
+
+    if (ram_block_coordinated_discard_require(true)) {
+        error_setg(errp, "Discarding RAM is disabled");
+        return;
+    }
+
+    ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
+    if (ret) {
+        error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
+        ram_block_coordinated_discard_require(false);
+        return;
+    }
+
+    virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
+
+    vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
+                        vmem->block_size;
+    vmem->bitmap = bitmap_new(vmem->bitmap_size);
+
+    virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
+    vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
+
+    host_memory_backend_set_mapped(vmem->memdev, true);
+    vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
+    qemu_register_reset(virtio_mem_system_reset, vmem);
+
+    /*
+     * Set ourselves as RamDiscardManager before the plug handler maps the
+     * memory region and exposes it via an address space.
+     */
+    memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+                                          RAM_DISCARD_MANAGER(vmem));
+}
+
+static void virtio_mem_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOMEM *vmem = VIRTIO_MEM(dev);
+
+    /*
+     * The unplug handler unmapped the memory region, it cannot be
+     * found via an address space anymore. Unset ourselves.
+     */
+    memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
+    qemu_unregister_reset(virtio_mem_system_reset, vmem);
+    vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
+    host_memory_backend_set_mapped(vmem->memdev, false);
+    virtio_del_queue(vdev, 0);
+    virtio_cleanup(vdev);
+    g_free(vmem->bitmap);
+    ram_block_coordinated_discard_require(false);
+}
+
+static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+                                       uint64_t offset, uint64_t size)
+{
+    RAMBlock *rb = vmem->memdev->mr.ram_block;
+
+    return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
+}
+
+static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+{
+    /* Make sure all memory is really discarded after migration. */
+    return virtio_mem_for_each_unplugged_range(vmem, NULL,
+                                               virtio_mem_discard_range_cb);
+}
+
+static int virtio_mem_post_load(void *opaque, int version_id)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+    RamDiscardListener *rdl;
+    int ret;
+
+    /*
+     * We started out with all memory discarded and our memory region is mapped
+     * into an address space. Replay, now that we updated the bitmap.
+     */
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                 virtio_mem_notify_populate_cb);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    if (migration_in_incoming_postcopy()) {
+        return 0;
+    }
+
+    return virtio_mem_restore_unplugged(vmem);
+}
+
+typedef struct VirtIOMEMMigSanityChecks {
+    VirtIOMEM *parent;
+    uint64_t addr;
+    uint64_t region_size;
+    uint64_t block_size;
+    uint32_t node;
+} VirtIOMEMMigSanityChecks;
+
+static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
+{
+    VirtIOMEMMigSanityChecks *tmp = opaque;
+    VirtIOMEM *vmem = tmp->parent;
+
+    tmp->addr = vmem->addr;
+    tmp->region_size = memory_region_size(&vmem->memdev->mr);
+    tmp->block_size = vmem->block_size;
+    tmp->node = vmem->node;
+    return 0;
+}
+
+static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
+{
+    VirtIOMEMMigSanityChecks *tmp = opaque;
+    VirtIOMEM *vmem = tmp->parent;
+    const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
+
+    if (tmp->addr != vmem->addr) {
+        error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
+                     VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
+        return -EINVAL;
+    }
+    /*
+     * Note: Preparation for resizeable memory regions. The maximum size
+     * of the memory region must not change during migration.
+     */
+    if (tmp->region_size != new_region_size) {
+        error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
+                     PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
+                     new_region_size);
+        return -EINVAL;
+    }
+    if (tmp->block_size != vmem->block_size) {
+        error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
+                     VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
+                     vmem->block_size);
+        return -EINVAL;
+    }
+    if (tmp->node != vmem->node) {
+        error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
+                     VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
+    .name = "virtio-mem-device/sanity-checks",
+    .pre_save = virtio_mem_mig_sanity_checks_pre_save,
+    .post_load = virtio_mem_mig_sanity_checks_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
+        VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
+        VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
+        VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+static const VMStateDescription vmstate_virtio_mem_device = {
+    .name = "virtio-mem-device",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .priority = MIG_PRI_VIRTIO_MEM,
+    .post_load = virtio_mem_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
+                         vmstate_virtio_mem_sanity_checks),
+        VMSTATE_UINT64(usable_region_size, VirtIOMEM),
+        VMSTATE_UINT64(size, VirtIOMEM),
+        VMSTATE_UINT64(requested_size, VirtIOMEM),
+        VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_virtio_mem = {
+    .name = "virtio-mem",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
+                                        VirtioMEMDeviceInfo *vi)
+{
+    vi->memaddr = vmem->addr;
+    vi->node = vmem->node;
+    vi->requested_size = vmem->requested_size;
+    vi->size = vmem->size;
+    vi->max_size = memory_region_size(&vmem->memdev->mr);
+    vi->block_size = vmem->block_size;
+    vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
+}
+
+static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
+{
+    if (!vmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
+        return NULL;
+    }
+
+    return &vmem->memdev->mr;
+}
+
+static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
+                                                Notifier *notifier)
+{
+    notifier_list_add(&vmem->size_change_notifiers, notifier);
+}
+
+static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
+                                                   Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
+static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(obj);
+    uint64_t value = vmem->size;
+
+    visit_type_size(v, name, &value, errp);
+}
+
+static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
+                                          const char *name, void *opaque,
+                                          Error **errp)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(obj);
+    uint64_t value = vmem->requested_size;
+
+    visit_type_size(v, name, &value, errp);
+}
+
+static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
+                                          const char *name, void *opaque,
+                                          Error **errp)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(obj);
+    Error *err = NULL;
+    uint64_t value;
+
+    visit_type_size(v, name, &value, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /*
+     * The block size and memory backend are not fixed until the device was
+     * realized. realize() will verify these properties then.
+     */
+    if (DEVICE(obj)->realized) {
+        if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
+            error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
+                       ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
+                       vmem->block_size);
+            return;
+        } else if (value > memory_region_size(&vmem->memdev->mr)) {
+            error_setg(errp, "'%s' cannot exceed the memory backend size"
+                       "(0x%" PRIx64 ")", name,
+                       memory_region_size(&vmem->memdev->mr));
+            return;
+        }
+
+        if (value != vmem->requested_size) {
+            virtio_mem_resize_usable_region(vmem, value, false);
+            vmem->requested_size = value;
+        }
+        /*
+         * Trigger a config update so the guest gets notified. We trigger
+         * even if the size didn't change (especially helpful for debugging).
+         */
+        virtio_notify_config(VIRTIO_DEVICE(vmem));
+    } else {
+        vmem->requested_size = value;
+    }
+}
+
+static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(obj);
+    uint64_t value = vmem->block_size;
+
+    /*
+     * If not configured by the user (and we're not realized yet), use the
+     * default block size we would use with the current memory backend.
+     */
+    if (!value) {
+        if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
+            value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
+        } else {
+            value = virtio_mem_thp_size();
+        }
+    }
+
+    visit_type_size(v, name, &value, errp);
+}
+
+static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(obj);
+    Error *err = NULL;
+    uint64_t value;
+
+    if (DEVICE(obj)->realized) {
+        error_setg(errp, "'%s' cannot be changed", name);
+        return;
+    }
+
+    visit_type_size(v, name, &value, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
+        error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
+                   VIRTIO_MEM_MIN_BLOCK_SIZE);
+        return;
+    } else if (!is_power_of_2(value)) {
+        error_setg(errp, "'%s' property has to be a power of two", name);
+        return;
+    }
+    vmem->block_size = value;
+}
+
+static void virtio_mem_instance_init(Object *obj)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(obj);
+
+    notifier_list_init(&vmem->size_change_notifiers);
+    QLIST_INIT(&vmem->rdl_list);
+
+    object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
+                        NULL, NULL, NULL);
+    object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
+                        virtio_mem_get_requested_size,
+                        virtio_mem_set_requested_size, NULL, NULL);
+    object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
+                        virtio_mem_get_block_size, virtio_mem_set_block_size,
+                        NULL, NULL);
+}
+
+static Property virtio_mem_properties[] = {
+    DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
+    DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
+    DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
+    DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
+    DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
+                            unplugged_inaccessible, ON_OFF_AUTO_AUTO),
+#endif
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
+                                                   const MemoryRegion *mr)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(mr == &vmem->memdev->mr);
+    return vmem->block_size;
+}
+
+static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
+                                        const MemoryRegionSection *s)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    uint64_t start_gpa = vmem->addr + s->offset_within_region;
+    uint64_t end_gpa = start_gpa + int128_get64(s->size);
+
+    g_assert(s->mr == &vmem->memdev->mr);
+
+    start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
+    end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
+
+    if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
+        return false;
+    }
+
+    return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
+}
+
+struct VirtIOMEMReplayData {
+    void *fn;
+    void *opaque;
+};
+
+static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
+{
+    struct VirtIOMEMReplayData *data = arg;
+
+    return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+}
+
+static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
+                                           MemoryRegionSection *s,
+                                           ReplayRamPopulate replay_fn,
+                                           void *opaque)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    struct VirtIOMEMReplayData data = {
+        .fn = replay_fn,
+        .opaque = opaque,
+    };
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    return virtio_mem_for_each_plugged_section(vmem, s, &data,
+                                            virtio_mem_rdm_replay_populated_cb);
+}
+
+static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
+                                              void *arg)
+{
+    struct VirtIOMEMReplayData *data = arg;
+
+    ((ReplayRamDiscard)data->fn)(s, data->opaque);
+    return 0;
+}
+
+static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+                                            MemoryRegionSection *s,
+                                            ReplayRamDiscard replay_fn,
+                                            void *opaque)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    struct VirtIOMEMReplayData data = {
+        .fn = replay_fn,
+        .opaque = opaque,
+    };
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    virtio_mem_for_each_unplugged_section(vmem, s, &data,
+                                          virtio_mem_rdm_replay_discarded_cb);
+}
+
+static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl,
+                                             MemoryRegionSection *s)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    int ret;
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    rdl->section = memory_region_section_new_copy(s);
+
+    QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
+    ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                              virtio_mem_notify_populate_cb);
+    if (ret) {
+        error_report("%s: Replaying plugged ranges failed: %s", __func__,
+                     strerror(-ret));
+    }
+}
+
+static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
+                                               RamDiscardListener *rdl)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(rdl->section->mr == &vmem->memdev->mr);
+    if (vmem->size) {
+        if (rdl->double_discard_supported) {
+            rdl->notify_discard(rdl, rdl->section);
+        } else {
+            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                virtio_mem_notify_discard_cb);
+        }
+    }
+
+    memory_region_section_free_copy(rdl->section);
+    rdl->section = NULL;
+    QLIST_REMOVE(rdl, next);
+}
+
+static void virtio_mem_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
+
+    device_class_set_props(dc, virtio_mem_properties);
+    dc->vmsd = &vmstate_virtio_mem;
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_mem_device_realize;
+    vdc->unrealize = virtio_mem_device_unrealize;
+    vdc->get_config = virtio_mem_get_config;
+    vdc->get_features = virtio_mem_get_features;
+    vdc->validate_features = virtio_mem_validate_features;
+    vdc->vmsd = &vmstate_virtio_mem_device;
+
+    vmc->fill_device_info = virtio_mem_fill_device_info;
+    vmc->get_memory_region = virtio_mem_get_memory_region;
+    vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
+    vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
+
+    rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+    rdmc->is_populated = virtio_mem_rdm_is_populated;
+    rdmc->replay_populated = virtio_mem_rdm_replay_populated;
+    rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
+    rdmc->register_listener = virtio_mem_rdm_register_listener;
+    rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
+}
+
+static const TypeInfo virtio_mem_info = {
+    .name = TYPE_VIRTIO_MEM,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOMEM),
+    .instance_init = virtio_mem_instance_init,
+    .class_init = virtio_mem_class_init,
+    .class_size = sizeof(VirtIOMEMClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_RAM_DISCARD_MANAGER },
+        { }
+    },
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_mem_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
new file mode 100644
index 00000000..d240efef
--- /dev/null
+++ b/hw/virtio/virtio-mmio.c
@@ -0,0 +1,862 @@
+/*
+ * Virtio MMIO bindings
+ *
+ * Copyright (c) 2011 Linaro Limited
+ *
+ * Author:
+ *  Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_mmio.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "hw/virtio/virtio.h"
+#include "migration/qemu-file-types.h"
+#include "qemu/host-utils.h"
+#include "qemu/module.h"
+#include "sysemu/kvm.h"
+#include "sysemu/replay.h"
+#include "hw/virtio/virtio-mmio.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+
+    return (proxy->flags & VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD) != 0;
+}
+
+static int virtio_mmio_ioeventfd_assign(DeviceState *d,
+                                        EventNotifier *notifier,
+                                        int n, bool assign)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+
+    if (assign) {
+        memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4,
+                                  true, n, notifier);
+    } else {
+        memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4,
+                                  true, n, notifier);
+    }
+    return 0;
+}
+
+static void virtio_mmio_start_ioeventfd(VirtIOMMIOProxy *proxy)
+{
+    virtio_bus_start_ioeventfd(&proxy->bus);
+}
+
+static void virtio_mmio_stop_ioeventfd(VirtIOMMIOProxy *proxy)
+{
+    virtio_bus_stop_ioeventfd(&proxy->bus);
+}
+
+static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy)
+{
+    int i;
+
+    virtio_bus_reset(&proxy->bus);
+
+    if (!proxy->legacy) {
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            proxy->vqs[i].enabled = 0;
+        }
+    }
+}
+
+static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
+{
+    VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    trace_virtio_mmio_read(offset);
+
+    if (!vdev) {
+        /* If no backend is present, we treat most registers as
+         * read-as-zero, except for the magic number, version and
+         * vendor ID. This is not strictly sanctioned by the virtio
+         * spec, but it allows us to provide transports with no backend
+         * plugged in which don't confuse Linux's virtio code: the
+         * probe won't complain about the bad magic number, but the
+         * device ID of zero means no backend will claim it.
+         */
+        switch (offset) {
+        case VIRTIO_MMIO_MAGIC_VALUE:
+            return VIRT_MAGIC;
+        case VIRTIO_MMIO_VERSION:
+            if (proxy->legacy) {
+                return VIRT_VERSION_LEGACY;
+            } else {
+                return VIRT_VERSION;
+            }
+        case VIRTIO_MMIO_VENDOR_ID:
+            return VIRT_VENDOR;
+        default:
+            return 0;
+        }
+    }
+
+    if (offset >= VIRTIO_MMIO_CONFIG) {
+        offset -= VIRTIO_MMIO_CONFIG;
+        if (proxy->legacy) {
+            switch (size) {
+            case 1:
+                return virtio_config_readb(vdev, offset);
+            case 2:
+                return virtio_config_readw(vdev, offset);
+            case 4:
+                return virtio_config_readl(vdev, offset);
+            default:
+                abort();
+            }
+        } else {
+            switch (size) {
+            case 1:
+                return virtio_config_modern_readb(vdev, offset);
+            case 2:
+                return virtio_config_modern_readw(vdev, offset);
+            case 4:
+                return virtio_config_modern_readl(vdev, offset);
+            default:
+                abort();
+            }
+        }
+    }
+    if (size != 4) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: wrong size access to register!\n",
+                      __func__);
+        return 0;
+    }
+    switch (offset) {
+    case VIRTIO_MMIO_MAGIC_VALUE:
+        return VIRT_MAGIC;
+    case VIRTIO_MMIO_VERSION:
+        if (proxy->legacy) {
+            return VIRT_VERSION_LEGACY;
+        } else {
+            return VIRT_VERSION;
+        }
+    case VIRTIO_MMIO_DEVICE_ID:
+        return vdev->device_id;
+    case VIRTIO_MMIO_VENDOR_ID:
+        return VIRT_VENDOR;
+    case VIRTIO_MMIO_DEVICE_FEATURES:
+        if (proxy->legacy) {
+            if (proxy->host_features_sel) {
+                return 0;
+            } else {
+                return vdev->host_features;
+            }
+        } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+            return (vdev->host_features & ~vdc->legacy_features)
+                >> (32 * proxy->host_features_sel);
+        }
+    case VIRTIO_MMIO_QUEUE_NUM_MAX:
+        if (!virtio_queue_get_num(vdev, vdev->queue_sel)) {
+            return 0;
+        }
+        return VIRTQUEUE_MAX_SIZE;
+    case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return virtio_queue_get_addr(vdev, vdev->queue_sel)
+            >> proxy->guest_page_shift;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return proxy->vqs[vdev->queue_sel].enabled;
+    case VIRTIO_MMIO_INTERRUPT_STATUS:
+        return qatomic_read(&vdev->isr);
+    case VIRTIO_MMIO_STATUS:
+        return vdev->status;
+    case VIRTIO_MMIO_CONFIG_GENERATION:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: read from non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return 0;
+        }
+        return vdev->generation;
+   case VIRTIO_MMIO_SHM_LEN_LOW:
+   case VIRTIO_MMIO_SHM_LEN_HIGH:
+        /*
+         * VIRTIO_MMIO_SHM_SEL is unimplemented
+         * according to the linux driver, if region length is -1
+         * the shared memory doesn't exist
+         */
+        return -1;
+    case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
+    case VIRTIO_MMIO_DRIVER_FEATURES:
+    case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
+    case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+    case VIRTIO_MMIO_QUEUE_SEL:
+    case VIRTIO_MMIO_QUEUE_NUM:
+    case VIRTIO_MMIO_QUEUE_ALIGN:
+    case VIRTIO_MMIO_QUEUE_NOTIFY:
+    case VIRTIO_MMIO_INTERRUPT_ACK:
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: read of write-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
+        return 0;
+    }
+    return 0;
+}
+
+static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
+                              unsigned size)
+{
+    VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    trace_virtio_mmio_write_offset(offset, value);
+
+    if (!vdev) {
+        /* If no backend is present, we just make all registers
+         * write-ignored. This allows us to provide transports with
+         * no backend plugged in.
+         */
+        return;
+    }
+
+    if (offset >= VIRTIO_MMIO_CONFIG) {
+        offset -= VIRTIO_MMIO_CONFIG;
+        if (proxy->legacy) {
+            switch (size) {
+            case 1:
+                virtio_config_writeb(vdev, offset, value);
+                break;
+            case 2:
+                virtio_config_writew(vdev, offset, value);
+                break;
+            case 4:
+                virtio_config_writel(vdev, offset, value);
+                break;
+            default:
+                abort();
+            }
+            return;
+        } else {
+            switch (size) {
+            case 1:
+                virtio_config_modern_writeb(vdev, offset, value);
+                break;
+            case 2:
+                virtio_config_modern_writew(vdev, offset, value);
+                break;
+            case 4:
+                virtio_config_modern_writel(vdev, offset, value);
+                break;
+            default:
+                abort();
+            }
+            return;
+        }
+    }
+    if (size != 4) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: wrong size access to register!\n",
+                      __func__);
+        return;
+    }
+    switch (offset) {
+    case VIRTIO_MMIO_DEVICE_FEATURES_SEL:
+        if (value) {
+            proxy->host_features_sel = 1;
+        } else {
+            proxy->host_features_sel = 0;
+        }
+        break;
+    case VIRTIO_MMIO_DRIVER_FEATURES:
+        if (proxy->legacy) {
+            if (proxy->guest_features_sel) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "%s: attempt to write guest features with "
+                              "guest_features_sel > 0 in legacy mode\n",
+                              __func__);
+            } else {
+                virtio_set_features(vdev, value);
+            }
+        } else {
+            proxy->guest_features[proxy->guest_features_sel] = value;
+        }
+        break;
+    case VIRTIO_MMIO_DRIVER_FEATURES_SEL:
+        if (value) {
+            proxy->guest_features_sel = 1;
+        } else {
+            proxy->guest_features_sel = 0;
+        }
+        break;
+    case VIRTIO_MMIO_GUEST_PAGE_SIZE:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->guest_page_shift = ctz32(value);
+        if (proxy->guest_page_shift > 31) {
+            proxy->guest_page_shift = 0;
+        }
+        trace_virtio_mmio_guest_page(value, proxy->guest_page_shift);
+        break;
+    case VIRTIO_MMIO_QUEUE_SEL:
+        if (value < VIRTIO_QUEUE_MAX) {
+            vdev->queue_sel = value;
+        }
+        break;
+    case VIRTIO_MMIO_QUEUE_NUM:
+        trace_virtio_mmio_queue_write(value, VIRTQUEUE_MAX_SIZE);
+        virtio_queue_set_num(vdev, vdev->queue_sel, value);
+
+        if (proxy->legacy) {
+            virtio_queue_update_rings(vdev, vdev->queue_sel);
+        } else {
+            proxy->vqs[vdev->queue_sel].num = value;
+        }
+        break;
+    case VIRTIO_MMIO_QUEUE_ALIGN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        virtio_queue_set_align(vdev, vdev->queue_sel, value);
+        break;
+    case VIRTIO_MMIO_QUEUE_PFN:
+        if (!proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to legacy register (0x%"
+                          HWADDR_PRIx ") in non-legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        if (value == 0) {
+            virtio_mmio_soft_reset(proxy);
+        } else {
+            virtio_queue_set_addr(vdev, vdev->queue_sel,
+                                  value << proxy->guest_page_shift);
+        }
+        break;
+    case VIRTIO_MMIO_QUEUE_READY:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        if (value) {
+            virtio_queue_set_num(vdev, vdev->queue_sel,
+                                 proxy->vqs[vdev->queue_sel].num);
+            virtio_queue_set_rings(vdev, vdev->queue_sel,
+                ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].desc[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].avail[0],
+                ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+                proxy->vqs[vdev->queue_sel].used[0]);
+            proxy->vqs[vdev->queue_sel].enabled = 1;
+        } else {
+            proxy->vqs[vdev->queue_sel].enabled = 0;
+        }
+        break;
+    case VIRTIO_MMIO_QUEUE_NOTIFY:
+        if (value < VIRTIO_QUEUE_MAX) {
+            virtio_queue_notify(vdev, value);
+        }
+        break;
+    case VIRTIO_MMIO_INTERRUPT_ACK:
+        qatomic_and(&vdev->isr, ~value);
+        virtio_update_irq(vdev);
+        break;
+    case VIRTIO_MMIO_STATUS:
+        if (!(value & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_mmio_stop_ioeventfd(proxy);
+        }
+
+        if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) {
+            virtio_set_features(vdev,
+                                ((uint64_t)proxy->guest_features[1]) << 32 |
+                                proxy->guest_features[0]);
+        }
+
+        virtio_set_status(vdev, value & 0xff);
+
+        if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_mmio_start_ioeventfd(proxy);
+        }
+
+        if (vdev->status == 0) {
+            virtio_mmio_soft_reset(proxy);
+        }
+        break;
+    case VIRTIO_MMIO_QUEUE_DESC_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_DESC_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].desc[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_AVAIL_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].avail[1] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_LOW:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[0] = value;
+        break;
+    case VIRTIO_MMIO_QUEUE_USED_HIGH:
+        if (proxy->legacy) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: write to non-legacy register (0x%"
+                          HWADDR_PRIx ") in legacy mode\n",
+                          __func__, offset);
+            return;
+        }
+        proxy->vqs[vdev->queue_sel].used[1] = value;
+        break;
+    case VIRTIO_MMIO_MAGIC_VALUE:
+    case VIRTIO_MMIO_VERSION:
+    case VIRTIO_MMIO_DEVICE_ID:
+    case VIRTIO_MMIO_VENDOR_ID:
+    case VIRTIO_MMIO_DEVICE_FEATURES:
+    case VIRTIO_MMIO_QUEUE_NUM_MAX:
+    case VIRTIO_MMIO_INTERRUPT_STATUS:
+    case VIRTIO_MMIO_CONFIG_GENERATION:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: write to read-only register (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: bad register offset (0x%" HWADDR_PRIx ")\n",
+                      __func__, offset);
+    }
+}
+
+static const MemoryRegionOps virtio_legacy_mem_ops = {
+    .read = virtio_mmio_read,
+    .write = virtio_mmio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const MemoryRegionOps virtio_mem_ops = {
+    .read = virtio_mmio_read,
+    .write = virtio_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    int level;
+
+    if (!vdev) {
+        return;
+    }
+    level = (qatomic_read(&vdev->isr) != 0);
+    trace_virtio_mmio_setting_irq(level);
+    qemu_set_irq(proxy->irq, level);
+}
+
+static int virtio_mmio_load_config(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    proxy->host_features_sel = qemu_get_be32(f);
+    proxy->guest_features_sel = qemu_get_be32(f);
+    proxy->guest_page_shift = qemu_get_be32(f);
+    return 0;
+}
+
+static void virtio_mmio_save_config(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    qemu_put_be32(f, proxy->host_features_sel);
+    qemu_put_be32(f, proxy->guest_features_sel);
+    qemu_put_be32(f, proxy->guest_page_shift);
+}
+
+static const VMStateDescription vmstate_virtio_mmio_queue_state = {
+    .name = "virtio_mmio/queue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(num, VirtIOMMIOQueue),
+        VMSTATE_BOOL(enabled, VirtIOMMIOQueue),
+        VMSTATE_UINT32_ARRAY(desc, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(avail, VirtIOMMIOQueue, 2),
+        VMSTATE_UINT32_ARRAY(used, VirtIOMMIOQueue, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio_state_sub = {
+    .name = "virtio_mmio/state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(guest_features, VirtIOMMIOProxy, 2),
+        VMSTATE_STRUCT_ARRAY(vqs, VirtIOMMIOProxy, VIRTIO_QUEUE_MAX, 0,
+                             vmstate_virtio_mmio_queue_state,
+                             VirtIOMMIOQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_mmio = {
+    .name = "virtio_mmio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_virtio_mmio_state_sub,
+        NULL
+    }
+};
+
+static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+}
+
+static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+}
+
+static bool virtio_mmio_has_extra_state(DeviceState *opaque)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
+
+    return !proxy->legacy;
+}
+
+static void virtio_mmio_reset(DeviceState *d)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    int i;
+
+    virtio_mmio_soft_reset(proxy);
+
+    proxy->host_features_sel = 0;
+    proxy->guest_features_sel = 0;
+    proxy->guest_page_shift = 0;
+
+    if (!proxy->legacy) {
+        proxy->guest_features[0] = proxy->guest_features[1] = 0;
+
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            proxy->vqs[i].num = 0;
+            proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+            proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+            proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+        }
+    }
+}
+
+static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
+                                          bool with_irqfd)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+
+    if (assign) {
+        int r = event_notifier_init(notifier, 0);
+        if (r < 0) {
+            return r;
+        }
+        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
+    } else {
+        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
+        event_notifier_cleanup(notifier);
+    }
+
+    if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
+        vdc->guest_notifier_mask(vdev, n, !assign);
+    }
+
+    return 0;
+}
+
+static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
+                                           bool assign)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    /* TODO: need to check if kvm-arm supports irqfd */
+    bool with_irqfd = false;
+    int r, n;
+
+    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
+
+    for (n = 0; n < nvqs; n++) {
+        if (!virtio_queue_get_num(vdev, n)) {
+            break;
+        }
+
+        r = virtio_mmio_set_guest_notifier(d, n, assign, with_irqfd);
+        if (r < 0) {
+            goto assign_error;
+        }
+    }
+
+    return 0;
+
+assign_error:
+    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
+    assert(assign);
+    while (--n >= 0) {
+        virtio_mmio_set_guest_notifier(d, n, !assign, false);
+    }
+    return r;
+}
+
+static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (!proxy->legacy) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+}
+
+/* virtio-mmio device */
+
+static Property virtio_mmio_properties[] = {
+    DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
+                     format_transport_address, true),
+    DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
+    DEFINE_PROP_BIT("ioeventfd", VirtIOMMIOProxy, flags,
+                    VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(d);
+
+    qbus_init(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS, d, NULL);
+    sysbus_init_irq(sbd, &proxy->irq);
+
+    if (!kvm_eventfds_enabled()) {
+        proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
+    }
+
+    /* fd-based ioevents can't be synchronized in record/replay */
+    if (replay_mode != REPLAY_MODE_NONE) {
+        proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
+    }
+
+    if (proxy->legacy) {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_legacy_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    } else {
+        memory_region_init_io(&proxy->iomem, OBJECT(d),
+                              &virtio_mem_ops, proxy,
+                              TYPE_VIRTIO_MMIO, 0x200);
+    }
+    sysbus_init_mmio(sbd, &proxy->iomem);
+}
+
+static void virtio_mmio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = virtio_mmio_realizefn;
+    dc->reset = virtio_mmio_reset;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, virtio_mmio_properties);
+}
+
+static const TypeInfo virtio_mmio_info = {
+    .name          = TYPE_VIRTIO_MMIO,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(VirtIOMMIOProxy),
+    .class_init    = virtio_mmio_class_init,
+};
+
+/* virtio-mmio-bus. */
+
+static char *virtio_mmio_bus_get_dev_path(DeviceState *dev)
+{
+    BusState *virtio_mmio_bus;
+    VirtIOMMIOProxy *virtio_mmio_proxy;
+    char *proxy_path;
+    char *path;
+    MemoryRegionSection section;
+
+    virtio_mmio_bus = qdev_get_parent_bus(dev);
+    virtio_mmio_proxy = VIRTIO_MMIO(virtio_mmio_bus->parent);
+    proxy_path = qdev_get_dev_path(DEVICE(virtio_mmio_proxy));
+
+    /*
+     * If @format_transport_address is false, then we just perform the same as
+     * virtio_bus_get_dev_path(): we delegate the address formatting for the
+     * device on the virtio-mmio bus to the bus that the virtio-mmio proxy
+     * (i.e., the device that implements the virtio-mmio bus) resides on. In
+     * this case the base address of the virtio-mmio transport will be
+     * invisible.
+     */
+    if (!virtio_mmio_proxy->format_transport_address) {
+        return proxy_path;
+    }
+
+    /* Otherwise, we append the base address of the transport. */
+    section = memory_region_find(&virtio_mmio_proxy->iomem, 0, 0x200);
+    assert(section.mr);
+
+    if (proxy_path) {
+        path = g_strdup_printf("%s/virtio-mmio@" TARGET_FMT_plx, proxy_path,
+                               section.offset_within_address_space);
+    } else {
+        path = g_strdup_printf("virtio-mmio@" TARGET_FMT_plx,
+                               section.offset_within_address_space);
+    }
+    memory_region_unref(section.mr);
+
+    g_free(proxy_path);
+    return path;
+}
+
+static void virtio_mmio_vmstate_change(DeviceState *d, bool running)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+
+    if (running) {
+        virtio_mmio_start_ioeventfd(proxy);
+    } else {
+        virtio_mmio_stop_ioeventfd(proxy);
+    }
+}
+
+static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *bus_class = BUS_CLASS(klass);
+    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+
+    k->notify = virtio_mmio_update_irq;
+    k->save_config = virtio_mmio_save_config;
+    k->load_config = virtio_mmio_load_config;
+    k->save_extra_state = virtio_mmio_save_extra_state;
+    k->load_extra_state = virtio_mmio_load_extra_state;
+    k->has_extra_state = virtio_mmio_has_extra_state;
+    k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
+    k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
+    k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
+    k->pre_plugged = virtio_mmio_pre_plugged;
+    k->vmstate_change = virtio_mmio_vmstate_change;
+    k->has_variable_vring_alignment = true;
+    bus_class->max_dev = 1;
+    bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
+}
+
+static const TypeInfo virtio_mmio_bus_info = {
+    .name          = TYPE_VIRTIO_MMIO_BUS,
+    .parent        = TYPE_VIRTIO_BUS,
+    .instance_size = sizeof(VirtioBusState),
+    .class_init    = virtio_mmio_bus_class_init,
+};
+
+static void virtio_mmio_register_types(void)
+{
+    type_register_static(&virtio_mmio_bus_info);
+    type_register_static(&virtio_mmio_info);
+}
+
+type_init(virtio_mmio_register_types)
diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c
new file mode 100644
index 00000000..e03543a7
--- /dev/null
+++ b/hw/virtio/virtio-net-pci.c
@@ -0,0 +1,108 @@
+/*
+ * Virtio net PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-net.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIONetPCI VirtIONetPCI;
+
+/*
+ * virtio-net-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIONetPCI, VIRTIO_NET_PCI,
+                         TYPE_VIRTIO_NET_PCI)
+
+struct VirtIONetPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIONet vdev;
+};
+
+static Property virtio_net_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_net_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    DeviceState *qdev = DEVICE(vpci_dev);
+    VirtIONetPCI *dev = VIRTIO_NET_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    VirtIONet *net = VIRTIO_NET(vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = 2 * MAX(net->nic_conf.peers.queues, 1)
+            + 1 /* Config interrupt */
+            + 1 /* Control vq */;
+    }
+
+    virtio_net_set_netclient_name(&dev->vdev, qdev->id,
+                                  object_get_typename(OBJECT(qdev)));
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_net_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
+
+    k->romfile = "efi-virtio.rom";
+    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    k->device_id = PCI_DEVICE_ID_VIRTIO_NET;
+    k->revision = VIRTIO_PCI_ABI_VERSION;
+    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
+    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+    device_class_set_props(dc, virtio_net_properties);
+    vpciklass->realize = virtio_net_pci_realize;
+}
+
+static void virtio_net_pci_instance_init(Object *obj)
+{
+    VirtIONetPCI *dev = VIRTIO_NET_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_NET);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex");
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = {
+    .base_name             = TYPE_VIRTIO_NET_PCI,
+    .generic_name          = "virtio-net-pci",
+    .transitional_name     = "virtio-net-pci-transitional",
+    .non_transitional_name = "virtio-net-pci-non-transitional",
+    .instance_size = sizeof(VirtIONetPCI),
+    .instance_init = virtio_net_pci_instance_init,
+    .class_init    = virtio_net_pci_class_init,
+};
+
+static void virtio_net_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_net_pci_info);
+}
+
+type_init(virtio_net_pci_register)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
new file mode 100644
index 00000000..a1c9dfa7
--- /dev/null
+++ b/hw/virtio/virtio-pci.c
@@ -0,0 +1,2298 @@
+/*
+ * Virtio PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "exec/memop.h"
+#include "standard-headers/linux/virtio_pci.h"
+#include "hw/boards.h"
+#include "hw/virtio/virtio.h"
+#include "migration/qemu-file-types.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/loader.h"
+#include "sysemu/kvm.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qemu/range.h"
+#include "hw/virtio/virtio-bus.h"
+#include "qapi/visitor.h"
+#include "sysemu/replay.h"
+#include "trace.h"
+
+#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
+
+#undef VIRTIO_PCI_CONFIG
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
+
+static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
+                               VirtIOPCIProxy *dev);
+static void virtio_pci_reset(DeviceState *qdev);
+
+/* virtio device */
+/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
+static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
+{
+    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+}
+
+/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
+{
+    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+}
+
+static void virtio_pci_notify(DeviceState *d, uint16_t vector)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
+
+    if (msix_enabled(&proxy->pci_dev)) {
+        if (vector != VIRTIO_NO_VECTOR) {
+            msix_notify(&proxy->pci_dev, vector);
+        }
+    } else {
+        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+        pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
+    }
+}
+
+static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    pci_device_save(&proxy->pci_dev, f);
+    msix_save(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, vdev->config_vector);
+}
+
+static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
+    .name = "virtio_pci/modern_queue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(num, VirtIOPCIQueue),
+        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
+        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
+        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
+        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
+        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool virtio_pci_modern_state_needed(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+
+    return virtio_pci_modern(proxy);
+}
+
+static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
+    .name = "virtio_pci/modern_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_pci_modern_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
+        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
+        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
+        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
+                             vmstate_virtio_pci_modern_queue_state,
+                             VirtIOPCIQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_pci = {
+    .name = "virtio_pci",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_virtio_pci_modern_state_sub,
+        NULL
+    }
+};
+
+static bool virtio_pci_has_extra_state(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+
+    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
+}
+
+static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+
+    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
+}
+
+static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+
+    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
+}
+
+static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (msix_present(&proxy->pci_dev))
+        qemu_put_be16(f, virtio_queue_vector(vdev, n));
+}
+
+static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint16_t vector;
+
+    int ret;
+    ret = pci_device_load(&proxy->pci_dev, f);
+    if (ret) {
+        return ret;
+    }
+    msix_unuse_all_vectors(&proxy->pci_dev);
+    msix_load(&proxy->pci_dev, f);
+    if (msix_present(&proxy->pci_dev)) {
+        qemu_get_be16s(f, &vector);
+
+        if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
+            return -EINVAL;
+        }
+    } else {
+        vector = VIRTIO_NO_VECTOR;
+    }
+    vdev->config_vector = vector;
+    if (vector != VIRTIO_NO_VECTOR) {
+        msix_vector_use(&proxy->pci_dev, vector);
+    }
+    return 0;
+}
+
+static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    uint16_t vector;
+    if (msix_present(&proxy->pci_dev)) {
+        qemu_get_be16s(f, &vector);
+        if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
+            return -EINVAL;
+        }
+    } else {
+        vector = VIRTIO_NO_VECTOR;
+    }
+    virtio_queue_set_vector(vdev, n, vector);
+    if (vector != VIRTIO_NO_VECTOR) {
+        msix_vector_use(&proxy->pci_dev, vector);
+    }
+
+    return 0;
+}
+
+static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+
+    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
+}
+
+#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
+
+static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
+{
+    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
+        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
+}
+
+static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
+                                       int n, bool assign)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    bool legacy = virtio_pci_legacy(proxy);
+    bool modern = virtio_pci_modern(proxy);
+    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
+    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
+    MemoryRegion *modern_mr = &proxy->notify.mr;
+    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
+    MemoryRegion *legacy_mr = &proxy->bar;
+    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
+                         virtio_get_queue_index(vq);
+    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
+
+    if (assign) {
+        if (modern) {
+            if (fast_mmio) {
+                memory_region_add_eventfd(modern_mr, modern_addr, 0,
+                                          false, n, notifier);
+            } else {
+                memory_region_add_eventfd(modern_mr, modern_addr, 2,
+                                          false, n, notifier);
+            }
+            if (modern_pio) {
+                memory_region_add_eventfd(modern_notify_mr, 0, 2,
+                                              true, n, notifier);
+            }
+        }
+        if (legacy) {
+            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
+                                      true, n, notifier);
+        }
+    } else {
+        if (modern) {
+            if (fast_mmio) {
+                memory_region_del_eventfd(modern_mr, modern_addr, 0,
+                                          false, n, notifier);
+            } else {
+                memory_region_del_eventfd(modern_mr, modern_addr, 2,
+                                          false, n, notifier);
+            }
+            if (modern_pio) {
+                memory_region_del_eventfd(modern_notify_mr, 0, 2,
+                                          true, n, notifier);
+            }
+        }
+        if (legacy) {
+            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
+                                      true, n, notifier);
+        }
+    }
+    return 0;
+}
+
+static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    virtio_bus_start_ioeventfd(&proxy->bus);
+}
+
+static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
+{
+    virtio_bus_stop_ioeventfd(&proxy->bus);
+}
+
+static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint16_t vector;
+    hwaddr pa;
+
+    switch (addr) {
+    case VIRTIO_PCI_GUEST_FEATURES:
+        /* Guest does not negotiate properly?  We have to assume nothing. */
+        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
+            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
+        }
+        virtio_set_features(vdev, val);
+        break;
+    case VIRTIO_PCI_QUEUE_PFN:
+        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+        if (pa == 0) {
+            virtio_pci_reset(DEVICE(proxy));
+        }
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        break;
+    case VIRTIO_PCI_QUEUE_SEL:
+        if (val < VIRTIO_QUEUE_MAX)
+            vdev->queue_sel = val;
+        break;
+    case VIRTIO_PCI_QUEUE_NOTIFY:
+        if (val < VIRTIO_QUEUE_MAX) {
+            virtio_queue_notify(vdev, val);
+        }
+        break;
+    case VIRTIO_PCI_STATUS:
+        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_pci_stop_ioeventfd(proxy);
+        }
+
+        virtio_set_status(vdev, val & 0xFF);
+
+        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_pci_start_ioeventfd(proxy);
+        }
+
+        if (vdev->status == 0) {
+            virtio_pci_reset(DEVICE(proxy));
+        }
+
+        /* Linux before 2.6.34 drives the device without enabling
+           the PCI device bus master bit. Enable it automatically
+           for the guest. This is a PCI spec violation but so is
+           initiating DMA with bus master bit clear. */
+        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
+            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
+                                     proxy->pci_dev.config[PCI_COMMAND] |
+                                     PCI_COMMAND_MASTER, 1);
+        }
+        break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        if (vdev->config_vector != VIRTIO_NO_VECTOR) {
+            msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        }
+        /* Make it possible for guest to discover an error took place. */
+        if (val < proxy->nvectors) {
+            msix_vector_use(&proxy->pci_dev, val);
+        } else {
+            val = VIRTIO_NO_VECTOR;
+        }
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        vector = virtio_queue_vector(vdev, vdev->queue_sel);
+        if (vector != VIRTIO_NO_VECTOR) {
+            msix_vector_unuse(&proxy->pci_dev, vector);
+        }
+        /* Make it possible for guest to discover an error took place. */
+        if (val < proxy->nvectors) {
+            msix_vector_use(&proxy->pci_dev, val);
+        } else {
+            val = VIRTIO_NO_VECTOR;
+        }
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: unexpected address 0x%x value 0x%x\n",
+                      __func__, addr, val);
+        break;
+    }
+}
+
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint32_t ret = 0xFFFFFFFF;
+
+    switch (addr) {
+    case VIRTIO_PCI_HOST_FEATURES:
+        ret = vdev->host_features;
+        break;
+    case VIRTIO_PCI_GUEST_FEATURES:
+        ret = vdev->guest_features;
+        break;
+    case VIRTIO_PCI_QUEUE_PFN:
+        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
+              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+        break;
+    case VIRTIO_PCI_QUEUE_NUM:
+        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
+        break;
+    case VIRTIO_PCI_QUEUE_SEL:
+        ret = vdev->queue_sel;
+        break;
+    case VIRTIO_PCI_STATUS:
+        ret = vdev->status;
+        break;
+    case VIRTIO_PCI_ISR:
+        /* reading from the ISR also clears it. */
+        ret = qatomic_xchg(&vdev->isr, 0);
+        pci_irq_deassert(&proxy->pci_dev);
+        break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
+                                       unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
+    uint64_t val = 0;
+
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
+    if (addr < config) {
+        return virtio_ioport_read(proxy, addr);
+    }
+    addr -= config;
+
+    switch (size) {
+    case 1:
+        val = virtio_config_readb(vdev, addr);
+        break;
+    case 2:
+        val = virtio_config_readw(vdev, addr);
+        if (virtio_is_big_endian(vdev)) {
+            val = bswap16(val);
+        }
+        break;
+    case 4:
+        val = virtio_config_readl(vdev, addr);
+        if (virtio_is_big_endian(vdev)) {
+            val = bswap32(val);
+        }
+        break;
+    }
+    return val;
+}
+
+static void virtio_pci_config_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (vdev == NULL) {
+        return;
+    }
+
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
+    /*
+     * Virtio-PCI is odd. Ioports are LE but config space is target native
+     * endian.
+     */
+    switch (size) {
+    case 1:
+        virtio_config_writeb(vdev, addr, val);
+        break;
+    case 2:
+        if (virtio_is_big_endian(vdev)) {
+            val = bswap16(val);
+        }
+        virtio_config_writew(vdev, addr, val);
+        break;
+    case 4:
+        if (virtio_is_big_endian(vdev)) {
+            val = bswap32(val);
+        }
+        virtio_config_writel(vdev, addr, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps virtio_pci_config_ops = {
+    .read = virtio_pci_config_read,
+    .write = virtio_pci_config_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
+                                                 hwaddr *off, int len)
+{
+    int i;
+    VirtIOPCIRegion *reg;
+
+    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
+        reg = &proxy->regs[i];
+        if (*off >= reg->offset &&
+            *off + len <= reg->offset + reg->size) {
+            *off -= reg->offset;
+            return &reg->mr;
+        }
+    }
+
+    return NULL;
+}
+
+/* Below are generic functions to do memcpy from/to an address space,
+ * without byteswaps, with input validation.
+ *
+ * As regular address_space_* APIs all do some kind of byteswap at least for
+ * some host/target combinations, we are forced to explicitly convert to a
+ * known-endianness integer value.
+ * It doesn't really matter which endian format to go through, so the code
+ * below selects the endian that causes the least amount of work on the given
+ * host.
+ *
+ * Note: host pointer must be aligned.
+ */
+static
+void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
+                                const uint8_t *buf, int len)
+{
+    uint64_t val;
+    MemoryRegion *mr;
+
+    /* address_space_* APIs assume an aligned address.
+     * As address is under guest control, handle illegal values.
+     */
+    addr &= ~(len - 1);
+
+    mr = virtio_address_space_lookup(proxy, &addr, len);
+    if (!mr) {
+        return;
+    }
+
+    /* Make sure caller aligned buf properly */
+    assert(!(((uintptr_t)buf) & (len - 1)));
+
+    switch (len) {
+    case 1:
+        val = pci_get_byte(buf);
+        break;
+    case 2:
+        val = pci_get_word(buf);
+        break;
+    case 4:
+        val = pci_get_long(buf);
+        break;
+    default:
+        /* As length is under guest control, handle illegal values. */
+        return;
+    }
+    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
+                                 MEMTXATTRS_UNSPECIFIED);
+}
+
+static void
+virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
+                          uint8_t *buf, int len)
+{
+    uint64_t val;
+    MemoryRegion *mr;
+
+    /* address_space_* APIs assume an aligned address.
+     * As address is under guest control, handle illegal values.
+     */
+    addr &= ~(len - 1);
+
+    mr = virtio_address_space_lookup(proxy, &addr, len);
+    if (!mr) {
+        return;
+    }
+
+    /* Make sure caller aligned buf properly */
+    assert(!(((uintptr_t)buf) & (len - 1)));
+
+    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
+                                MEMTXATTRS_UNSPECIFIED);
+    switch (len) {
+    case 1:
+        pci_set_byte(buf, val);
+        break;
+    case 2:
+        pci_set_word(buf, val);
+        break;
+    case 4:
+        pci_set_long(buf, val);
+        break;
+    default:
+        /* As length is under guest control, handle illegal values. */
+        break;
+    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    struct virtio_pci_cfg_cap *cfg;
+
+    pci_default_write_config(pci_dev, address, val, len);
+
+    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
+        pcie_cap_flr_write_config(pci_dev, address, val, len);
+    }
+
+    if (range_covers_byte(address, len, PCI_COMMAND)) {
+        if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
+            virtio_set_disabled(vdev, true);
+            virtio_pci_stop_ioeventfd(proxy);
+            virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
+        } else {
+            virtio_set_disabled(vdev, false);
+        }
+    }
+
+    if (proxy->config_cap &&
+        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
+                                                                  pci_cfg_data),
+                       sizeof cfg->pci_cfg_data)) {
+        uint32_t off;
+        uint32_t len;
+
+        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
+        off = le32_to_cpu(cfg->cap.offset);
+        len = le32_to_cpu(cfg->cap.length);
+
+        if (len == 1 || len == 2 || len == 4) {
+            assert(len <= sizeof cfg->pci_cfg_data);
+            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
+        }
+    }
+}
+
+static uint32_t virtio_read_config(PCIDevice *pci_dev,
+                                   uint32_t address, int len)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    struct virtio_pci_cfg_cap *cfg;
+
+    if (proxy->config_cap &&
+        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
+                                                                  pci_cfg_data),
+                       sizeof cfg->pci_cfg_data)) {
+        uint32_t off;
+        uint32_t len;
+
+        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
+        off = le32_to_cpu(cfg->cap.offset);
+        len = le32_to_cpu(cfg->cap.length);
+
+        if (len == 1 || len == 2 || len == 4) {
+            assert(len <= sizeof cfg->pci_cfg_data);
+            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
+        }
+    }
+
+    return pci_default_read_config(pci_dev, address, len);
+}
+
+static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int ret;
+
+    if (irqfd->users == 0) {
+        KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+        ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
+        if (ret < 0) {
+            return ret;
+        }
+        kvm_irqchip_commit_route_changes(&c);
+        irqfd->virq = ret;
+    }
+    irqfd->users++;
+    return 0;
+}
+
+static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+                                             unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    if (--irqfd->users == 0) {
+        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+    }
+}
+
+static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
+                                 unsigned int queue_no,
+                                 unsigned int vector)
+{
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
+}
+
+static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
+                                      unsigned int queue_no,
+                                      unsigned int vector)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+    int ret;
+
+    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
+    assert(ret == 0);
+}
+
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    unsigned int vector;
+    int ret, queue_no;
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
+        if (ret < 0) {
+            goto undo;
+        }
+        /* If guest supports masking, set up irqfd now.
+         * Otherwise, delay until unmasked in the frontend.
+         */
+        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+            if (ret < 0) {
+                kvm_virtio_pci_vq_vector_release(proxy, vector);
+                goto undo;
+            }
+        }
+    }
+    return 0;
+
+undo:
+    while (--queue_no >= 0) {
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+    return ret;
+}
+
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    unsigned int vector;
+    int queue_no;
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector >= msix_nr_vectors_allocated(dev)) {
+            continue;
+        }
+        /* If guest supports masking, clean up irqfd now.
+         * Otherwise, it was cleaned when masked in the frontend.
+         */
+        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+        }
+        kvm_virtio_pci_vq_vector_release(proxy, vector);
+    }
+}
+
+static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+                                       unsigned int queue_no,
+                                       unsigned int vector,
+                                       MSIMessage msg)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+    VirtIOIRQFD *irqfd;
+    int ret = 0;
+
+    if (proxy->vector_irqfd) {
+        irqfd = &proxy->vector_irqfd[vector];
+        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
+            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
+                                               &proxy->pci_dev);
+            if (ret < 0) {
+                return ret;
+            }
+            kvm_irqchip_commit_routes(kvm_state);
+        }
+    }
+
+    /* If guest supports masking, irqfd is already setup, unmask it.
+     * Otherwise, set it up now.
+     */
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        k->guest_notifier_mask(vdev, queue_no, false);
+        /* Test after unmasking to avoid losing events. */
+        if (k->guest_notifier_pending &&
+            k->guest_notifier_pending(vdev, queue_no)) {
+            event_notifier_set(n);
+        }
+    } else {
+        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+    }
+    return ret;
+}
+
+static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+                                             unsigned int queue_no,
+                                             unsigned int vector)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    /* If guest supports masking, keep irqfd but mask it.
+     * Otherwise, clean it up now.
+     */ 
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        k->guest_notifier_mask(vdev, queue_no, true);
+    } else {
+        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+    }
+}
+
+static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
+                                    MSIMessage msg)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    int ret, index, unmasked = 0;
+
+    while (vq) {
+        index = virtio_get_queue_index(vq);
+        if (!virtio_queue_get_num(vdev, index)) {
+            break;
+        }
+        if (index < proxy->nvqs_with_notifiers) {
+            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
+            if (ret < 0) {
+                goto undo;
+            }
+            ++unmasked;
+        }
+        vq = virtio_vector_next_queue(vq);
+    }
+
+    return 0;
+
+undo:
+    vq = virtio_vector_first_queue(vdev, vector);
+    while (vq && unmasked >= 0) {
+        index = virtio_get_queue_index(vq);
+        if (index < proxy->nvqs_with_notifiers) {
+            virtio_pci_vq_vector_mask(proxy, index, vector);
+            --unmasked;
+        }
+        vq = virtio_vector_next_queue(vq);
+    }
+    return ret;
+}
+
+static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    int index;
+
+    while (vq) {
+        index = virtio_get_queue_index(vq);
+        if (!virtio_queue_get_num(vdev, index)) {
+            break;
+        }
+        if (index < proxy->nvqs_with_notifiers) {
+            virtio_pci_vq_vector_mask(proxy, index, vector);
+        }
+        vq = virtio_vector_next_queue(vq);
+    }
+}
+
+static void virtio_pci_vector_poll(PCIDevice *dev,
+                                   unsigned int vector_start,
+                                   unsigned int vector_end)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    int queue_no;
+    unsigned int vector;
+    EventNotifier *notifier;
+    VirtQueue *vq;
+
+    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector < vector_start || vector >= vector_end ||
+            !msix_is_masked(dev, vector)) {
+            continue;
+        }
+        vq = virtio_get_queue(vdev, queue_no);
+        notifier = virtio_queue_get_guest_notifier(vq);
+        if (k->guest_notifier_pending) {
+            if (k->guest_notifier_pending(vdev, queue_no)) {
+                msix_set_pending(dev, vector);
+            }
+        } else if (event_notifier_test_and_clear(notifier)) {
+            msix_set_pending(dev, vector);
+        }
+    }
+}
+
+static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
+                                         bool with_irqfd)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+
+    if (assign) {
+        int r = event_notifier_init(notifier, 0);
+        if (r < 0) {
+            return r;
+        }
+        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
+    } else {
+        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
+        event_notifier_cleanup(notifier);
+    }
+
+    if (!msix_enabled(&proxy->pci_dev) &&
+        vdev->use_guest_notifier_mask &&
+        vdc->guest_notifier_mask) {
+        vdc->guest_notifier_mask(vdev, n, !assign);
+    }
+
+    return 0;
+}
+
+static bool virtio_pci_query_guest_notifiers(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    return msix_enabled(&proxy->pci_dev);
+}
+
+static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    int r, n;
+    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
+        kvm_msi_via_irqfd_enabled();
+
+    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
+
+    /*
+     * When deassigning, pass a consistent nvqs value to avoid leaking
+     * notifiers. But first check we've actually been configured, exit
+     * early if we haven't.
+     */
+    if (!assign && !proxy->nvqs_with_notifiers) {
+        return 0;
+    }
+    assert(assign || nvqs == proxy->nvqs_with_notifiers);
+
+    proxy->nvqs_with_notifiers = nvqs;
+
+    /* Must unset vector notifier while guest notifier is still assigned */
+    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
+        msix_unset_vector_notifiers(&proxy->pci_dev);
+        if (proxy->vector_irqfd) {
+            kvm_virtio_pci_vector_release(proxy, nvqs);
+            g_free(proxy->vector_irqfd);
+            proxy->vector_irqfd = NULL;
+        }
+    }
+
+    for (n = 0; n < nvqs; n++) {
+        if (!virtio_queue_get_num(vdev, n)) {
+            break;
+        }
+
+        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
+        if (r < 0) {
+            goto assign_error;
+        }
+    }
+
+    /* Must set vector notifier after guest notifier has been assigned */
+    if ((with_irqfd || k->guest_notifier_mask) && assign) {
+        if (with_irqfd) {
+            proxy->vector_irqfd =
+                g_malloc0(sizeof(*proxy->vector_irqfd) *
+                          msix_nr_vectors_allocated(&proxy->pci_dev));
+            r = kvm_virtio_pci_vector_use(proxy, nvqs);
+            if (r < 0) {
+                goto assign_error;
+            }
+        }
+        r = msix_set_vector_notifiers(&proxy->pci_dev,
+                                      virtio_pci_vector_unmask,
+                                      virtio_pci_vector_mask,
+                                      virtio_pci_vector_poll);
+        if (r < 0) {
+            goto notifiers_error;
+        }
+    }
+
+    return 0;
+
+notifiers_error:
+    if (with_irqfd) {
+        assert(assign);
+        kvm_virtio_pci_vector_release(proxy, nvqs);
+    }
+
+assign_error:
+    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
+    assert(assign);
+    while (--n >= 0) {
+        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
+    }
+    return r;
+}
+
+static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
+                                           MemoryRegion *mr, bool assign)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    int offset;
+
+    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
+        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
+        return -1;
+    }
+
+    if (assign) {
+        offset = virtio_pci_queue_mem_mult(proxy) * n;
+        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
+    } else {
+        memory_region_del_subregion(&proxy->notify.mr, mr);
+    }
+
+    return 0;
+}
+
+static void virtio_pci_vmstate_change(DeviceState *d, bool running)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (running) {
+        /* Old QEMU versions did not set bus master enable on status write.
+         * Detect DRIVER set and enable it.
+         */
+        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
+            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
+            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
+            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
+                                     proxy->pci_dev.config[PCI_COMMAND] |
+                                     PCI_COMMAND_MASTER, 1);
+        }
+        virtio_pci_start_ioeventfd(proxy);
+    } else {
+        virtio_pci_stop_ioeventfd(proxy);
+    }
+}
+
+/*
+ * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
+ */
+
+static int virtio_pci_query_nvectors(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+
+    return proxy->nvectors;
+}
+
+static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    PCIDevice *dev = &proxy->pci_dev;
+
+    return pci_get_address_space(dev);
+}
+
+static bool virtio_pci_iommu_enabled(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    PCIDevice *dev = &proxy->pci_dev;
+    AddressSpace *dma_as = pci_device_iommu_address_space(dev);
+
+    if (dma_as == &address_space_memory) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool virtio_pci_queue_enabled(DeviceState *d, int n)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        return proxy->vqs[n].enabled;
+    }
+
+    return virtio_queue_enabled_legacy(vdev, n);
+}
+
+static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
+                                   struct virtio_pci_cap *cap)
+{
+    PCIDevice *dev = &proxy->pci_dev;
+    int offset;
+
+    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
+                                cap->cap_len, &error_abort);
+
+    assert(cap->cap_len >= sizeof *cap);
+    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
+           cap->cap_len - PCI_CAP_FLAGS);
+
+    return offset;
+}
+
+static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
+                                       unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint32_t val = 0;
+    int i;
+
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
+    switch (addr) {
+    case VIRTIO_PCI_COMMON_DFSELECT:
+        val = proxy->dfselect;
+        break;
+    case VIRTIO_PCI_COMMON_DF:
+        if (proxy->dfselect <= 1) {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+            val = (vdev->host_features & ~vdc->legacy_features) >>
+                (32 * proxy->dfselect);
+        }
+        break;
+    case VIRTIO_PCI_COMMON_GFSELECT:
+        val = proxy->gfselect;
+        break;
+    case VIRTIO_PCI_COMMON_GF:
+        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+            val = proxy->guest_features[proxy->gfselect];
+        }
+        break;
+    case VIRTIO_PCI_COMMON_MSIX:
+        val = vdev->config_vector;
+        break;
+    case VIRTIO_PCI_COMMON_NUMQ:
+        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
+            if (virtio_queue_get_num(vdev, i)) {
+                val = i + 1;
+            }
+        }
+        break;
+    case VIRTIO_PCI_COMMON_STATUS:
+        val = vdev->status;
+        break;
+    case VIRTIO_PCI_COMMON_CFGGENERATION:
+        val = vdev->generation;
+        break;
+    case VIRTIO_PCI_COMMON_Q_SELECT:
+        val = vdev->queue_sel;
+        break;
+    case VIRTIO_PCI_COMMON_Q_SIZE:
+        val = virtio_queue_get_num(vdev, vdev->queue_sel);
+        break;
+    case VIRTIO_PCI_COMMON_Q_MSIX:
+        val = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
+    case VIRTIO_PCI_COMMON_Q_ENABLE:
+        val = proxy->vqs[vdev->queue_sel].enabled;
+        break;
+    case VIRTIO_PCI_COMMON_Q_NOFF:
+        /* Simply map queues in order */
+        val = vdev->queue_sel;
+        break;
+    case VIRTIO_PCI_COMMON_Q_DESCLO:
+        val = proxy->vqs[vdev->queue_sel].desc[0];
+        break;
+    case VIRTIO_PCI_COMMON_Q_DESCHI:
+        val = proxy->vqs[vdev->queue_sel].desc[1];
+        break;
+    case VIRTIO_PCI_COMMON_Q_AVAILLO:
+        val = proxy->vqs[vdev->queue_sel].avail[0];
+        break;
+    case VIRTIO_PCI_COMMON_Q_AVAILHI:
+        val = proxy->vqs[vdev->queue_sel].avail[1];
+        break;
+    case VIRTIO_PCI_COMMON_Q_USEDLO:
+        val = proxy->vqs[vdev->queue_sel].used[0];
+        break;
+    case VIRTIO_PCI_COMMON_Q_USEDHI:
+        val = proxy->vqs[vdev->queue_sel].used[1];
+        break;
+    case VIRTIO_PCI_COMMON_Q_RESET:
+        val = proxy->vqs[vdev->queue_sel].reset;
+        break;
+    default:
+        val = 0;
+    }
+
+    return val;
+}
+
+static void virtio_pci_common_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint16_t vector;
+
+    if (vdev == NULL) {
+        return;
+    }
+
+    switch (addr) {
+    case VIRTIO_PCI_COMMON_DFSELECT:
+        proxy->dfselect = val;
+        break;
+    case VIRTIO_PCI_COMMON_GFSELECT:
+        proxy->gfselect = val;
+        break;
+    case VIRTIO_PCI_COMMON_GF:
+        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+            proxy->guest_features[proxy->gfselect] = val;
+            virtio_set_features(vdev,
+                                (((uint64_t)proxy->guest_features[1]) << 32) |
+                                proxy->guest_features[0]);
+        }
+        break;
+    case VIRTIO_PCI_COMMON_MSIX:
+        if (vdev->config_vector != VIRTIO_NO_VECTOR) {
+            msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        }
+        /* Make it possible for guest to discover an error took place. */
+        if (val < proxy->nvectors) {
+            msix_vector_use(&proxy->pci_dev, val);
+        } else {
+            val = VIRTIO_NO_VECTOR;
+        }
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_PCI_COMMON_STATUS:
+        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+            virtio_pci_stop_ioeventfd(proxy);
+        }
+
+        virtio_set_status(vdev, val & 0xFF);
+
+        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
+            virtio_pci_start_ioeventfd(proxy);
+        }
+
+        if (vdev->status == 0) {
+            virtio_pci_reset(DEVICE(proxy));
+        }
+
+        break;
+    case VIRTIO_PCI_COMMON_Q_SELECT:
+        if (val < VIRTIO_QUEUE_MAX) {
+            vdev->queue_sel = val;
+        }
+        break;
+    case VIRTIO_PCI_COMMON_Q_SIZE:
+        proxy->vqs[vdev->queue_sel].num = val;
+        virtio_queue_set_num(vdev, vdev->queue_sel,
+                             proxy->vqs[vdev->queue_sel].num);
+        break;
+    case VIRTIO_PCI_COMMON_Q_MSIX:
+        vector = virtio_queue_vector(vdev, vdev->queue_sel);
+        if (vector != VIRTIO_NO_VECTOR) {
+            msix_vector_unuse(&proxy->pci_dev, vector);
+        }
+        /* Make it possible for guest to discover an error took place. */
+        if (val < proxy->nvectors) {
+            msix_vector_use(&proxy->pci_dev, val);
+        } else {
+            val = VIRTIO_NO_VECTOR;
+        }
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    case VIRTIO_PCI_COMMON_Q_ENABLE:
+        if (val == 1) {
+            virtio_queue_set_num(vdev, vdev->queue_sel,
+                                 proxy->vqs[vdev->queue_sel].num);
+            virtio_queue_set_rings(vdev, vdev->queue_sel,
+                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
+                       proxy->vqs[vdev->queue_sel].desc[0],
+                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
+                       proxy->vqs[vdev->queue_sel].avail[0],
+                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
+                       proxy->vqs[vdev->queue_sel].used[0]);
+            proxy->vqs[vdev->queue_sel].enabled = 1;
+            proxy->vqs[vdev->queue_sel].reset = 0;
+            virtio_queue_enable(vdev, vdev->queue_sel);
+        } else {
+            virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
+        }
+        break;
+    case VIRTIO_PCI_COMMON_Q_DESCLO:
+        proxy->vqs[vdev->queue_sel].desc[0] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_DESCHI:
+        proxy->vqs[vdev->queue_sel].desc[1] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_AVAILLO:
+        proxy->vqs[vdev->queue_sel].avail[0] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_AVAILHI:
+        proxy->vqs[vdev->queue_sel].avail[1] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_USEDLO:
+        proxy->vqs[vdev->queue_sel].used[0] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_USEDHI:
+        proxy->vqs[vdev->queue_sel].used[1] = val;
+        break;
+    case VIRTIO_PCI_COMMON_Q_RESET:
+        if (val == 1) {
+            proxy->vqs[vdev->queue_sel].reset = 1;
+
+            virtio_queue_reset(vdev, vdev->queue_sel);
+
+            proxy->vqs[vdev->queue_sel].reset = 0;
+            proxy->vqs[vdev->queue_sel].enabled = 0;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+
+static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
+                                       unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    if (virtio_bus_get_device(&proxy->bus) == NULL) {
+        return UINT64_MAX;
+    }
+
+    return 0;
+}
+
+static void virtio_pci_notify_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
+
+    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
+        trace_virtio_pci_notify_write(addr, val, size);
+        virtio_queue_notify(vdev, queue);
+    }
+}
+
+static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
+                                        uint64_t val, unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    unsigned queue = val;
+
+    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
+        trace_virtio_pci_notify_write_pio(addr, val, size);
+        virtio_queue_notify(vdev, queue);
+    }
+}
+
+static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint64_t val;
+
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
+    val = qatomic_xchg(&vdev->isr, 0);
+    pci_irq_deassert(&proxy->pci_dev);
+    return val;
+}
+
+static void virtio_pci_isr_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+}
+
+static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
+                                       unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    uint64_t val;
+
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
+    switch (size) {
+    case 1:
+        val = virtio_config_modern_readb(vdev, addr);
+        break;
+    case 2:
+        val = virtio_config_modern_readw(vdev, addr);
+        break;
+    case 4:
+        val = virtio_config_modern_readl(vdev, addr);
+        break;
+    default:
+        val = 0;
+        break;
+    }
+    return val;
+}
+
+static void virtio_pci_device_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (vdev == NULL) {
+        return;
+    }
+
+    switch (size) {
+    case 1:
+        virtio_config_modern_writeb(vdev, addr, val);
+        break;
+    case 2:
+        virtio_config_modern_writew(vdev, addr, val);
+        break;
+    case 4:
+        virtio_config_modern_writel(vdev, addr, val);
+        break;
+    }
+}
+
+static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
+                                           const char *vdev_name)
+{
+    static const MemoryRegionOps common_ops = {
+        .read = virtio_pci_common_read,
+        .write = virtio_pci_common_write,
+        .impl = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+        .endianness = DEVICE_LITTLE_ENDIAN,
+    };
+    static const MemoryRegionOps isr_ops = {
+        .read = virtio_pci_isr_read,
+        .write = virtio_pci_isr_write,
+        .impl = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+        .endianness = DEVICE_LITTLE_ENDIAN,
+    };
+    static const MemoryRegionOps device_ops = {
+        .read = virtio_pci_device_read,
+        .write = virtio_pci_device_write,
+        .impl = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+        .endianness = DEVICE_LITTLE_ENDIAN,
+    };
+    static const MemoryRegionOps notify_ops = {
+        .read = virtio_pci_notify_read,
+        .write = virtio_pci_notify_write,
+        .impl = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+        .endianness = DEVICE_LITTLE_ENDIAN,
+    };
+    static const MemoryRegionOps notify_pio_ops = {
+        .read = virtio_pci_notify_read,
+        .write = virtio_pci_notify_write_pio,
+        .impl = {
+            .min_access_size = 1,
+            .max_access_size = 4,
+        },
+        .endianness = DEVICE_LITTLE_ENDIAN,
+    };
+    g_autoptr(GString) name = g_string_new(NULL);
+
+    g_string_printf(name, "virtio-pci-common-%s", vdev_name);
+    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
+                          &common_ops,
+                          proxy,
+                          name->str,
+                          proxy->common.size);
+
+    g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
+    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
+                          &isr_ops,
+                          proxy,
+                          name->str,
+                          proxy->isr.size);
+
+    g_string_printf(name, "virtio-pci-device-%s", vdev_name);
+    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
+                          &device_ops,
+                          proxy,
+                          name->str,
+                          proxy->device.size);
+
+    g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
+    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
+                          &notify_ops,
+                          proxy,
+                          name->str,
+                          proxy->notify.size);
+
+    g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
+    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
+                          &notify_pio_ops,
+                          proxy,
+                          name->str,
+                          proxy->notify_pio.size);
+}
+
+static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
+                                         VirtIOPCIRegion *region,
+                                         struct virtio_pci_cap *cap,
+                                         MemoryRegion *mr,
+                                         uint8_t bar)
+{
+    memory_region_add_subregion(mr, region->offset, &region->mr);
+
+    cap->cfg_type = region->type;
+    cap->bar = bar;
+    cap->offset = cpu_to_le32(region->offset);
+    cap->length = cpu_to_le32(region->size);
+    virtio_pci_add_mem_cap(proxy, cap);
+
+}
+
+static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
+                                             VirtIOPCIRegion *region,
+                                             struct virtio_pci_cap *cap)
+{
+    virtio_pci_modern_region_map(proxy, region, cap,
+                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
+}
+
+static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
+                                            VirtIOPCIRegion *region,
+                                            struct virtio_pci_cap *cap)
+{
+    virtio_pci_modern_region_map(proxy, region, cap,
+                                 &proxy->io_bar, proxy->modern_io_bar_idx);
+}
+
+static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
+                                               VirtIOPCIRegion *region)
+{
+    memory_region_del_subregion(&proxy->modern_bar,
+                                &region->mr);
+}
+
+static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
+                                              VirtIOPCIRegion *region)
+{
+    memory_region_del_subregion(&proxy->io_bar,
+                                &region->mr);
+}
+
+static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (virtio_pci_modern(proxy)) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+
+    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
+}
+
+/* This is called by virtio-bus just after the device is plugged. */
+static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtioBusState *bus = &proxy->bus;
+    bool legacy = virtio_pci_legacy(proxy);
+    bool modern;
+    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
+    uint8_t *config;
+    uint32_t size;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    /*
+     * Virtio capabilities present without
+     * VIRTIO_F_VERSION_1 confuses guests
+     */
+    if (!proxy->ignore_backend_features &&
+            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
+        virtio_pci_disable_modern(proxy);
+
+        if (!legacy) {
+            error_setg(errp, "Device doesn't support modern mode, and legacy"
+                             " mode is disabled");
+            error_append_hint(errp, "Set disable-legacy to off\n");
+
+            return;
+        }
+    }
+
+    modern = virtio_pci_modern(proxy);
+
+    config = proxy->pci_dev.config;
+    if (proxy->class_code) {
+        pci_config_set_class(config, proxy->class_code);
+    }
+
+    if (legacy) {
+        if (!virtio_legacy_allowed(vdev)) {
+            /*
+             * To avoid migration issues, we allow legacy mode when legacy
+             * check is disabled in the old machine types (< 5.1).
+             */
+            if (virtio_legacy_check_disabled(vdev)) {
+                warn_report("device is modern-only, but for backward "
+                            "compatibility legacy is allowed");
+            } else {
+                error_setg(errp,
+                           "device is modern-only, use disable-legacy=on");
+                return;
+            }
+        }
+        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
+                       " neither legacy nor transitional device");
+            return;
+        }
+        /*
+         * Legacy and transitional devices use specific subsystem IDs.
+         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
+         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
+         */
+        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
+    } else {
+        /* pure virtio-1.0 */
+        pci_set_word(config + PCI_VENDOR_ID,
+                     PCI_VENDOR_ID_REDHAT_QUMRANET);
+        pci_set_word(config + PCI_DEVICE_ID,
+                     PCI_DEVICE_ID_VIRTIO_10_BASE + virtio_bus_get_vdev_id(bus));
+        pci_config_set_revision(config, 1);
+    }
+    config[PCI_INTERRUPT_PIN] = 1;
+
+
+    if (modern) {
+        struct virtio_pci_cap cap = {
+            .cap_len = sizeof cap,
+        };
+        struct virtio_pci_notify_cap notify = {
+            .cap.cap_len = sizeof notify,
+            .notify_off_multiplier =
+                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
+        };
+        struct virtio_pci_cfg_cap cfg = {
+            .cap.cap_len = sizeof cfg,
+            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
+        };
+        struct virtio_pci_notify_cap notify_pio = {
+            .cap.cap_len = sizeof notify,
+            .notify_off_multiplier = cpu_to_le32(0x0),
+        };
+
+        struct virtio_pci_cfg_cap *cfg_mask;
+
+        virtio_pci_modern_regions_init(proxy, vdev->name);
+
+        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
+        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
+        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
+        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
+
+        if (modern_pio) {
+            memory_region_init(&proxy->io_bar, OBJECT(proxy),
+                               "virtio-pci-io", 0x4);
+
+            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
+                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
+
+            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
+                                            &notify_pio.cap);
+        }
+
+        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
+                         PCI_BASE_ADDRESS_SPACE_MEMORY |
+                         PCI_BASE_ADDRESS_MEM_PREFETCH |
+                         PCI_BASE_ADDRESS_MEM_TYPE_64,
+                         &proxy->modern_bar);
+
+        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
+        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
+        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
+        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
+        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
+        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
+    }
+
+    if (proxy->nvectors) {
+        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
+                                          proxy->msix_bar_idx, NULL);
+        if (err) {
+            /* Notice when a system that supports MSIx can't initialize it */
+            if (err != -ENOTSUP) {
+                warn_report("unable to init msix vectors to %" PRIu32,
+                            proxy->nvectors);
+            }
+            proxy->nvectors = 0;
+        }
+    }
+
+    proxy->pci_dev.config_write = virtio_write_config;
+    proxy->pci_dev.config_read = virtio_read_config;
+
+    if (legacy) {
+        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
+            + virtio_bus_get_vdev_config_len(bus);
+        size = pow2ceil(size);
+
+        memory_region_init_io(&proxy->bar, OBJECT(proxy),
+                              &virtio_pci_config_ops,
+                              proxy, "virtio-pci", size);
+
+        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
+                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
+    }
+}
+
+static void virtio_pci_device_unplugged(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    bool modern = virtio_pci_modern(proxy);
+    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
+
+    virtio_pci_stop_ioeventfd(proxy);
+
+    if (modern) {
+        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
+        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
+        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
+        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
+        if (modern_pio) {
+            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
+        }
+    }
+}
+
+static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
+    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
+                     !pci_bus_is_root(pci_get_bus(pci_dev));
+
+    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
+    /* fd-based ioevents can't be synchronized in record/replay */
+    if (replay_mode != REPLAY_MODE_NONE) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
+    /*
+     * virtio pci bar layout used by default.
+     * subclasses can re-arrange things if needed.
+     *
+     *   region 0   --  virtio legacy io bar
+     *   region 1   --  msi-x bar
+     *   region 2   --  virtio modern io bar (off by default)
+     *   region 4+5 --  virtio modern memory (64bit) bar
+     *
+     */
+    proxy->legacy_io_bar_idx  = 0;
+    proxy->msix_bar_idx       = 1;
+    proxy->modern_io_bar_idx  = 2;
+    proxy->modern_mem_bar_idx = 4;
+
+    proxy->common.offset = 0x0;
+    proxy->common.size = 0x1000;
+    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
+
+    proxy->isr.offset = 0x1000;
+    proxy->isr.size = 0x1000;
+    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
+
+    proxy->device.offset = 0x2000;
+    proxy->device.size = 0x1000;
+    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
+
+    proxy->notify.offset = 0x3000;
+    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
+    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
+
+    proxy->notify_pio.offset = 0x0;
+    proxy->notify_pio.size = 0x4;
+    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
+
+    /* subclasses can enforce modern, so do this unconditionally */
+    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
+                       /* PCI BAR regions must be powers of 2 */
+                       pow2ceil(proxy->notify.offset + proxy->notify.size));
+
+    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
+        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+    }
+
+    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
+        error_setg(errp, "device cannot work as neither modern nor legacy mode"
+                   " is enabled");
+        error_append_hint(errp, "Set either disable-modern or disable-legacy"
+                          " to off\n");
+        return;
+    }
+
+    if (pcie_port && pci_is_express(pci_dev)) {
+        int pos;
+        uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
+
+        pos = pcie_endpoint_cap_init(pci_dev, 0);
+        assert(pos > 0);
+
+        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
+                                 PCI_PM_SIZEOF, errp);
+        if (pos < 0) {
+            return;
+        }
+
+        pci_dev->exp.pm_cap = pos;
+
+        /*
+         * Indicates that this function complies with revision 1.2 of the
+         * PCI Power Management Interface Specification.
+         */
+        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
+            pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
+                          PCI_ERR_SIZEOF, NULL);
+            last_pcie_cap_offset += PCI_ERR_SIZEOF;
+        }
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
+            /* Init error enabling flags */
+            pcie_cap_deverr_init(pci_dev);
+        }
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
+            /* Init Link Control Register */
+            pcie_cap_lnkctl_init(pci_dev);
+        }
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
+            /* Init Power Management Control Register */
+            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
+                         PCI_PM_CTRL_STATE_MASK);
+        }
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
+            pcie_ats_init(pci_dev, last_pcie_cap_offset,
+                          proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
+            last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
+        }
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
+            /* Set Function Level Reset capability bit */
+            pcie_cap_flr_init(pci_dev);
+        }
+    } else {
+        /*
+         * make future invocations of pci_is_express() return false
+         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
+         */
+        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
+    }
+
+    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
+    if (k->realize) {
+        k->realize(proxy, errp);
+    }
+}
+
+static void virtio_pci_exit(PCIDevice *pci_dev)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
+                     !pci_bus_is_root(pci_get_bus(pci_dev));
+
+    msix_uninit_exclusive_bar(pci_dev);
+    if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
+        pci_is_express(pci_dev)) {
+        pcie_aer_exit(pci_dev);
+    }
+}
+
+static void virtio_pci_reset(DeviceState *qdev)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
+    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
+    int i;
+
+    virtio_bus_reset(bus);
+    msix_unuse_all_vectors(&proxy->pci_dev);
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        proxy->vqs[i].enabled = 0;
+        proxy->vqs[i].reset = 0;
+        proxy->vqs[i].num = 0;
+        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
+        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
+        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
+    }
+}
+
+static void virtio_pci_bus_reset(DeviceState *qdev)
+{
+    PCIDevice *dev = PCI_DEVICE(qdev);
+
+    virtio_pci_reset(qdev);
+
+    if (pci_is_express(dev)) {
+        pcie_cap_deverr_reset(dev);
+        pcie_cap_lnkctl_reset(dev);
+
+        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
+    }
+}
+
+static Property virtio_pci_properties[] = {
+    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
+    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
+    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
+    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
+    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
+    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
+                     ignore_backend_features, false),
+    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_ATS_BIT, false),
+    DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
+    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
+    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
+    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
+    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
+    DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_AER_BIT, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
+{
+    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
+    PCIDevice *pci_dev = &proxy->pci_dev;
+
+    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
+        virtio_pci_modern(proxy)) {
+        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+    }
+
+    vpciklass->parent_dc_realize(qdev, errp);
+}
+
+static void virtio_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
+
+    device_class_set_props(dc, virtio_pci_properties);
+    k->realize = virtio_pci_realize;
+    k->exit = virtio_pci_exit;
+    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    k->revision = VIRTIO_PCI_ABI_VERSION;
+    k->class_id = PCI_CLASS_OTHERS;
+    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
+                                    &vpciklass->parent_dc_realize);
+    dc->reset = virtio_pci_bus_reset;
+}
+
+static const TypeInfo virtio_pci_info = {
+    .name          = TYPE_VIRTIO_PCI,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(VirtIOPCIProxy),
+    .class_init    = virtio_pci_class_init,
+    .class_size    = sizeof(VirtioPCIClass),
+    .abstract      = true,
+};
+
+static Property virtio_pci_generic_properties[] = {
+    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
+                            ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
+{
+    const VirtioPCIDeviceTypeInfo *t = data;
+    if (t->class_init) {
+        t->class_init(klass, NULL);
+    }
+}
+
+static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_pci_generic_properties);
+}
+
+static void virtio_pci_transitional_instance_init(Object *obj)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+    proxy->disable_legacy = ON_OFF_AUTO_OFF;
+    proxy->disable_modern = false;
+}
+
+static void virtio_pci_non_transitional_instance_init(Object *obj)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
+
+    proxy->disable_legacy = ON_OFF_AUTO_ON;
+    proxy->disable_modern = false;
+}
+
+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
+{
+    char *base_name = NULL;
+    TypeInfo base_type_info = {
+        .name          = t->base_name,
+        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
+        .instance_size = t->instance_size,
+        .instance_init = t->instance_init,
+        .class_size    = t->class_size,
+        .abstract      = true,
+        .interfaces    = t->interfaces,
+    };
+    TypeInfo generic_type_info = {
+        .name = t->generic_name,
+        .parent = base_type_info.name,
+        .class_init = virtio_pci_generic_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { INTERFACE_PCIE_DEVICE },
+            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+            { }
+        },
+    };
+
+    if (!base_type_info.name) {
+        /* No base type -> register a single generic device type */
+        /* use intermediate %s-base-type to add generic device props */
+        base_name = g_strdup_printf("%s-base-type", t->generic_name);
+        base_type_info.name = base_name;
+        base_type_info.class_init = virtio_pci_generic_class_init;
+
+        generic_type_info.parent = base_name;
+        generic_type_info.class_init = virtio_pci_base_class_init;
+        generic_type_info.class_data = (void *)t;
+
+        assert(!t->non_transitional_name);
+        assert(!t->transitional_name);
+    } else {
+        base_type_info.class_init = virtio_pci_base_class_init;
+        base_type_info.class_data = (void *)t;
+    }
+
+    type_register(&base_type_info);
+    if (generic_type_info.name) {
+        type_register(&generic_type_info);
+    }
+
+    if (t->non_transitional_name) {
+        const TypeInfo non_transitional_type_info = {
+            .name          = t->non_transitional_name,
+            .parent        = base_type_info.name,
+            .instance_init = virtio_pci_non_transitional_instance_init,
+            .interfaces = (InterfaceInfo[]) {
+                { INTERFACE_PCIE_DEVICE },
+                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+                { }
+            },
+        };
+        type_register(&non_transitional_type_info);
+    }
+
+    if (t->transitional_name) {
+        const TypeInfo transitional_type_info = {
+            .name          = t->transitional_name,
+            .parent        = base_type_info.name,
+            .instance_init = virtio_pci_transitional_instance_init,
+            .interfaces = (InterfaceInfo[]) {
+                /*
+                 * Transitional virtio devices work only as Conventional PCI
+                 * devices because they require PIO ports.
+                 */
+                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+                { }
+            },
+        };
+        type_register(&transitional_type_info);
+    }
+    g_free(base_name);
+}
+
+unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
+{
+    /*
+     * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
+     * virtqueue buffers can handle their completion. When a different vCPU
+     * handles completion it may need to IPI the vCPU that submitted the
+     * request and this adds overhead.
+     *
+     * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
+     * guests with very many vCPUs and a device that is only used by a few
+     * vCPUs. Unfortunately optimizing that case requires manual pinning inside
+     * the guest, so those users might as well manually set the number of
+     * queues. There is no upper limit that can be applied automatically and
+     * doing so arbitrarily would result in a sudden performance drop once the
+     * threshold number of vCPUs is exceeded.
+     */
+    unsigned num_queues = current_machine->smp.cpus;
+
+    /*
+     * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
+     * config change interrupt and the fixed virtqueues must be taken into
+     * account too.
+     */
+    num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
+
+    /*
+     * There is a limit to how many virtqueues a device can have.
+     */
+    return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
+}
+
+/* virtio-pci-bus */
+
+static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
+                               VirtIOPCIProxy *dev)
+{
+    DeviceState *qdev = DEVICE(dev);
+    char virtio_bus_name[] = "virtio-bus";
+
+    qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
+}
+
+static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *bus_class = BUS_CLASS(klass);
+    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+    bus_class->max_dev = 1;
+    k->notify = virtio_pci_notify;
+    k->save_config = virtio_pci_save_config;
+    k->load_config = virtio_pci_load_config;
+    k->save_queue = virtio_pci_save_queue;
+    k->load_queue = virtio_pci_load_queue;
+    k->save_extra_state = virtio_pci_save_extra_state;
+    k->load_extra_state = virtio_pci_load_extra_state;
+    k->has_extra_state = virtio_pci_has_extra_state;
+    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
+    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
+    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
+    k->vmstate_change = virtio_pci_vmstate_change;
+    k->pre_plugged = virtio_pci_pre_plugged;
+    k->device_plugged = virtio_pci_device_plugged;
+    k->device_unplugged = virtio_pci_device_unplugged;
+    k->query_nvectors = virtio_pci_query_nvectors;
+    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
+    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
+    k->get_dma_as = virtio_pci_get_dma_as;
+    k->iommu_enabled = virtio_pci_iommu_enabled;
+    k->queue_enabled = virtio_pci_queue_enabled;
+}
+
+static const TypeInfo virtio_pci_bus_info = {
+    .name          = TYPE_VIRTIO_PCI_BUS,
+    .parent        = TYPE_VIRTIO_BUS,
+    .instance_size = sizeof(VirtioPCIBusState),
+    .class_size    = sizeof(VirtioPCIBusClass),
+    .class_init    = virtio_pci_bus_class_init,
+};
+
+static void virtio_pci_register_types(void)
+{
+    /* Base types: */
+    type_register_static(&virtio_pci_bus_info);
+    type_register_static(&virtio_pci_info);
+}
+
+type_init(virtio_pci_register_types)
+
diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c
new file mode 100644
index 00000000..7d9f4ec1
--- /dev/null
+++ b/hw/virtio/virtio-pmem-pci.c
@@ -0,0 +1,127 @@
+/*
+ * Virtio PMEM PCI device
+ *
+ * Copyright (C) 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Pankaj Gupta <pagupta@redhat.com>
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "virtio-pmem-pci.h"
+#include "hw/mem/memory-device.h"
+#include "qapi/error.h"
+
+static void virtio_pmem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOPMEMPCI *pmem_pci = VIRTIO_PMEM_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&pmem_pci->vdev);
+
+    virtio_pci_force_virtio_1(vpci_dev);
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_pmem_pci_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                     Error **errp)
+{
+    object_property_set_uint(OBJECT(md), VIRTIO_PMEM_ADDR_PROP, addr, errp);
+}
+
+static uint64_t virtio_pmem_pci_get_addr(const MemoryDeviceState *md)
+{
+    return object_property_get_uint(OBJECT(md), VIRTIO_PMEM_ADDR_PROP,
+                                    &error_abort);
+}
+
+static MemoryRegion *virtio_pmem_pci_get_memory_region(MemoryDeviceState *md,
+                                                       Error **errp)
+{
+    VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
+    VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
+
+    return vpc->get_memory_region(pmem, errp);
+}
+
+static uint64_t virtio_pmem_pci_get_plugged_size(const MemoryDeviceState *md,
+                                                 Error **errp)
+{
+    VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
+    VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
+    MemoryRegion *mr = vpc->get_memory_region(pmem, errp);
+
+    /* the plugged size corresponds to the region size */
+    return mr ? memory_region_size(mr) : 0;
+}
+
+static void virtio_pmem_pci_fill_device_info(const MemoryDeviceState *md,
+                                             MemoryDeviceInfo *info)
+{
+    VirtioPMEMDeviceInfo *vi = g_new0(VirtioPMEMDeviceInfo, 1);
+    VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev);
+    VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem);
+    DeviceState *dev = DEVICE(md);
+
+    if (dev->id) {
+        vi->has_id = true;
+        vi->id = g_strdup(dev->id);
+    }
+
+    /* let the real device handle everything else */
+    vpc->fill_device_info(pmem, vi);
+
+    info->u.virtio_pmem.data = vi;
+    info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM;
+}
+
+static void virtio_pmem_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    k->realize = virtio_pmem_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+
+    mdc->get_addr = virtio_pmem_pci_get_addr;
+    mdc->set_addr = virtio_pmem_pci_set_addr;
+    mdc->get_plugged_size = virtio_pmem_pci_get_plugged_size;
+    mdc->get_memory_region = virtio_pmem_pci_get_memory_region;
+    mdc->fill_device_info = virtio_pmem_pci_fill_device_info;
+}
+
+static void virtio_pmem_pci_instance_init(Object *obj)
+{
+    VirtIOPMEMPCI *dev = VIRTIO_PMEM_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_PMEM);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = {
+    .base_name             = TYPE_VIRTIO_PMEM_PCI,
+    .generic_name          = "virtio-pmem-pci",
+    .instance_size = sizeof(VirtIOPMEMPCI),
+    .instance_init = virtio_pmem_pci_instance_init,
+    .class_init    = virtio_pmem_pci_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_MEMORY_DEVICE },
+        { }
+    },
+};
+
+static void virtio_pmem_pci_register_types(void)
+{
+    virtio_pci_types_register(&virtio_pmem_pci_info);
+}
+type_init(virtio_pmem_pci_register_types)
diff --git a/hw/virtio/virtio-pmem-pci.h b/hw/virtio/virtio-pmem-pci.h
new file mode 100644
index 00000000..63cfe727
--- /dev/null
+++ b/hw/virtio/virtio-pmem-pci.h
@@ -0,0 +1,35 @@
+/*
+ * Virtio PMEM PCI device
+ *
+ * Copyright (C) 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Pankaj Gupta <pagupta@redhat.com>
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_PMEM_PCI_H
+#define QEMU_VIRTIO_PMEM_PCI_H
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-pmem.h"
+#include "qom/object.h"
+
+typedef struct VirtIOPMEMPCI VirtIOPMEMPCI;
+
+/*
+ * virtio-pmem-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOPMEMPCI, VIRTIO_PMEM_PCI,
+                         TYPE_VIRTIO_PMEM_PCI)
+
+struct VirtIOPMEMPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOPMEM vdev;
+};
+
+#endif /* QEMU_VIRTIO_PMEM_PCI_H */
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
new file mode 100644
index 00000000..a1abfe0e
--- /dev/null
+++ b/hw/virtio/virtio-pmem.c
@@ -0,0 +1,196 @@
+/*
+ * Virtio PMEM device
+ *
+ * Copyright (C) 2018-2019 Red Hat, Inc.
+ *
+ * Authors:
+ *  Pankaj Gupta <pagupta@redhat.com>
+ *  David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "hw/virtio/virtio-pmem.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-access.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_pmem.h"
+#include "sysemu/hostmem.h"
+#include "block/aio.h"
+#include "block/thread-pool.h"
+#include "trace.h"
+
+typedef struct VirtIODeviceRequest {
+    VirtQueueElement elem;
+    int fd;
+    VirtIOPMEM *pmem;
+    VirtIODevice *vdev;
+    struct virtio_pmem_req req;
+    struct virtio_pmem_resp resp;
+} VirtIODeviceRequest;
+
+static int worker_cb(void *opaque)
+{
+    VirtIODeviceRequest *req_data = opaque;
+    int err = 0;
+
+    /* flush raw backing image */
+    err = fsync(req_data->fd);
+    trace_virtio_pmem_flush_done(err);
+    if (err != 0) {
+        err = 1;
+    }
+
+    virtio_stl_p(req_data->vdev, &req_data->resp.ret, err);
+
+    return 0;
+}
+
+static void done_cb(void *opaque, int ret)
+{
+    VirtIODeviceRequest *req_data = opaque;
+    int len = iov_from_buf(req_data->elem.in_sg, req_data->elem.in_num, 0,
+                              &req_data->resp, sizeof(struct virtio_pmem_resp));
+
+    /* Callbacks are serialized, so no need to use atomic ops. */
+    virtqueue_push(req_data->pmem->rq_vq, &req_data->elem, len);
+    virtio_notify((VirtIODevice *)req_data->pmem, req_data->pmem->rq_vq);
+    trace_virtio_pmem_response();
+    g_free(req_data);
+}
+
+static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIODeviceRequest *req_data;
+    VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
+    HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev);
+    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+
+    trace_virtio_pmem_flush_request();
+    req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest));
+    if (!req_data) {
+        virtio_error(vdev, "virtio-pmem missing request data");
+        return;
+    }
+
+    if (req_data->elem.out_num < 1 || req_data->elem.in_num < 1) {
+        virtio_error(vdev, "virtio-pmem request not proper");
+        virtqueue_detach_element(vq, (VirtQueueElement *)req_data, 0);
+        g_free(req_data);
+        return;
+    }
+    req_data->fd   = memory_region_get_fd(&backend->mr);
+    req_data->pmem = pmem;
+    req_data->vdev = vdev;
+    thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data);
+}
+
+static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
+    struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config;
+
+    virtio_stq_p(vdev, &pmemcfg->start, pmem->start);
+    virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr));
+}
+
+static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features,
+                                        Error **errp)
+{
+    return features;
+}
+
+static void virtio_pmem_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
+
+    if (!pmem->memdev) {
+        error_setg(errp, "virtio-pmem memdev not set");
+        return;
+    }
+
+    if (host_memory_backend_is_mapped(pmem->memdev)) {
+        error_setg(errp, "can't use already busy memdev: %s",
+                   object_get_canonical_path_component(OBJECT(pmem->memdev)));
+        return;
+    }
+
+    host_memory_backend_set_mapped(pmem->memdev, true);
+    virtio_init(vdev, VIRTIO_ID_PMEM, sizeof(struct virtio_pmem_config));
+    pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush);
+}
+
+static void virtio_pmem_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
+
+    host_memory_backend_set_mapped(pmem->memdev, false);
+    virtio_delete_queue(pmem->rq_vq);
+    virtio_cleanup(vdev);
+}
+
+static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
+                                         VirtioPMEMDeviceInfo *vi)
+{
+    vi->memaddr = pmem->start;
+    vi->size    = memory_region_size(&pmem->memdev->mr);
+    vi->memdev  = object_get_canonical_path(OBJECT(pmem->memdev));
+}
+
+static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
+                                                   Error **errp)
+{
+    if (!pmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
+        return NULL;
+    }
+
+    return &pmem->memdev->mr;
+}
+
+static Property virtio_pmem_properties[] = {
+    DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0),
+    DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pmem_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOPMEMClass *vpc = VIRTIO_PMEM_CLASS(klass);
+
+    device_class_set_props(dc, virtio_pmem_properties);
+
+    vdc->realize = virtio_pmem_realize;
+    vdc->unrealize = virtio_pmem_unrealize;
+    vdc->get_config = virtio_pmem_get_config;
+    vdc->get_features = virtio_pmem_get_features;
+
+    vpc->fill_device_info = virtio_pmem_fill_device_info;
+    vpc->get_memory_region = virtio_pmem_get_memory_region;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static const TypeInfo virtio_pmem_info = {
+    .name          = TYPE_VIRTIO_PMEM,
+    .parent        = TYPE_VIRTIO_DEVICE,
+    .class_size    = sizeof(VirtIOPMEMClass),
+    .class_init    = virtio_pmem_class_init,
+    .instance_size = sizeof(VirtIOPMEM),
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_pmem_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c
new file mode 100644
index 00000000..6e76f8b5
--- /dev/null
+++ b/hw/virtio/virtio-rng-pci.c
@@ -0,0 +1,96 @@
+/*
+ * Virtio rng PCI Bindings
+ *
+ * Copyright 2012 Red Hat, Inc.
+ * Copyright 2012 Amit Shah <amit.shah@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-rng.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+
+typedef struct VirtIORngPCI VirtIORngPCI;
+
+/*
+ * virtio-rng-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIORngPCI, VIRTIO_RNG_PCI,
+                         TYPE_VIRTIO_RNG_PCI)
+
+struct VirtIORngPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIORNG vdev;
+};
+
+static Property virtio_rng_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIORngPCI *vrng = VIRTIO_RNG_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&vrng->vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = 2;
+    }
+
+    if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) {
+        return;
+    }
+}
+
+static void virtio_rng_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = virtio_rng_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_RNG;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+    device_class_set_props(dc, virtio_rng_properties);
+}
+
+static void virtio_rng_initfn(Object *obj)
+{
+    VirtIORngPCI *dev = VIRTIO_RNG_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_RNG);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = {
+    .base_name             = TYPE_VIRTIO_RNG_PCI,
+    .generic_name          = "virtio-rng-pci",
+    .transitional_name     = "virtio-rng-pci-transitional",
+    .non_transitional_name = "virtio-rng-pci-non-transitional",
+    .instance_size = sizeof(VirtIORngPCI),
+    .instance_init = virtio_rng_initfn,
+    .class_init    = virtio_rng_pci_class_init,
+};
+
+static void virtio_rng_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_rng_pci_info);
+}
+
+type_init(virtio_rng_pci_register)
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
new file mode 100644
index 00000000..7e12fc03
--- /dev/null
+++ b/hw/virtio/virtio-rng.c
@@ -0,0 +1,289 @@
+/*
+ * A virtio device implementing a hardware random number generator.
+ *
+ * Copyright 2012 Red Hat, Inc.
+ * Copyright 2012 Amit Shah <amit.shah@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/iov.h"
+#include "qemu/module.h"
+#include "qemu/timer.h"
+#include "hw/virtio/virtio.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-rng.h"
+#include "sysemu/rng.h"
+#include "sysemu/runstate.h"
+#include "qom/object_interfaces.h"
+#include "trace.h"
+
+static bool is_guest_ready(VirtIORNG *vrng)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vrng);
+    if (virtio_queue_ready(vrng->vq)
+        && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return true;
+    }
+    trace_virtio_rng_guest_not_ready(vrng);
+    return false;
+}
+
+static size_t get_request_size(VirtQueue *vq, unsigned quota)
+{
+    unsigned int in, out;
+
+    virtqueue_get_avail_bytes(vq, &in, &out, quota, 0);
+    return in;
+}
+
+static void virtio_rng_process(VirtIORNG *vrng);
+
+/* Send data from a char device over to the guest */
+static void chr_read(void *opaque, const void *buf, size_t size)
+{
+    VirtIORNG *vrng = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vrng);
+    VirtQueueElement *elem;
+    size_t len;
+    int offset;
+
+    if (!is_guest_ready(vrng)) {
+        return;
+    }
+
+    /* we can't modify the virtqueue until
+     * our state is fully synced
+     */
+
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        trace_virtio_rng_cpu_is_stopped(vrng, size);
+        return;
+    }
+
+    vrng->quota_remaining -= size;
+
+    offset = 0;
+    while (offset < size) {
+        elem = virtqueue_pop(vrng->vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            break;
+        }
+        trace_virtio_rng_popped(vrng);
+        len = iov_from_buf(elem->in_sg, elem->in_num,
+                           0, buf + offset, size - offset);
+        offset += len;
+
+        virtqueue_push(vrng->vq, elem, len);
+        trace_virtio_rng_pushed(vrng, len);
+        g_free(elem);
+    }
+    virtio_notify(vdev, vrng->vq);
+
+    if (!virtio_queue_empty(vrng->vq)) {
+        /* If we didn't drain the queue, call virtio_rng_process
+         * to take care of asking for more data as appropriate.
+         */
+        virtio_rng_process(vrng);
+    }
+}
+
+static void virtio_rng_process(VirtIORNG *vrng)
+{
+    size_t size;
+    unsigned quota;
+
+    if (!is_guest_ready(vrng)) {
+        return;
+    }
+
+    if (vrng->activate_timer) {
+        timer_mod(vrng->rate_limit_timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + vrng->conf.period_ms);
+        vrng->activate_timer = false;
+    }
+
+    if (vrng->quota_remaining < 0) {
+        quota = 0;
+    } else {
+        quota = MIN((uint64_t)vrng->quota_remaining, (uint64_t)UINT32_MAX);
+    }
+    size = get_request_size(vrng->vq, quota);
+
+    trace_virtio_rng_request(vrng, size, quota);
+
+    size = MIN(vrng->quota_remaining, size);
+    if (size) {
+        rng_backend_request_entropy(vrng->rng, size, chr_read, vrng);
+    }
+}
+
+static void handle_input(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIORNG *vrng = VIRTIO_RNG(vdev);
+    virtio_rng_process(vrng);
+}
+
+static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
+{
+    return f;
+}
+
+static void virtio_rng_vm_state_change(void *opaque, bool running,
+                                       RunState state)
+{
+    VirtIORNG *vrng = opaque;
+
+    trace_virtio_rng_vm_state_change(vrng, running, state);
+
+    /* We may have an element ready but couldn't process it due to a quota
+     * limit or because CPU was stopped.  Make sure to try again when the
+     * CPU restart.
+     */
+
+    if (running && is_guest_ready(vrng)) {
+        virtio_rng_process(vrng);
+    }
+}
+
+static void check_rate_limit(void *opaque)
+{
+    VirtIORNG *vrng = opaque;
+
+    vrng->quota_remaining = vrng->conf.max_bytes;
+    virtio_rng_process(vrng);
+    vrng->activate_timer = true;
+}
+
+static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VirtIORNG *vrng = VIRTIO_RNG(vdev);
+
+    if (!vdev->vm_running) {
+        return;
+    }
+    vdev->status = status;
+
+    /* Something changed, try to process buffers */
+    virtio_rng_process(vrng);
+}
+
+static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIORNG *vrng = VIRTIO_RNG(dev);
+
+    if (vrng->conf.period_ms <= 0) {
+        error_setg(errp, "'period' parameter expects a positive integer");
+        return;
+    }
+
+    /* Workaround: Property parsing does not enforce unsigned integers,
+     * So this is a hack to reject such numbers. */
+    if (vrng->conf.max_bytes > INT64_MAX) {
+        error_setg(errp, "'max-bytes' parameter must be non-negative, "
+                   "and less than 2^63");
+        return;
+    }
+
+    if (vrng->conf.rng == NULL) {
+        Object *default_backend = object_new(TYPE_RNG_BUILTIN);
+
+        if (!user_creatable_complete(USER_CREATABLE(default_backend),
+                                     errp)) {
+            object_unref(default_backend);
+            return;
+        }
+
+        object_property_add_child(OBJECT(dev), "default-backend",
+                                  default_backend);
+
+        /* The child property took a reference, we can safely drop ours now */
+        object_unref(default_backend);
+
+        object_property_set_link(OBJECT(dev), "rng", default_backend,
+                                 &error_abort);
+    }
+
+    vrng->rng = vrng->conf.rng;
+    if (vrng->rng == NULL) {
+        error_setg(errp, "'rng' parameter expects a valid object");
+        return;
+    }
+
+    virtio_init(vdev, VIRTIO_ID_RNG, 0);
+
+    vrng->vq = virtio_add_queue(vdev, 8, handle_input);
+    vrng->quota_remaining = vrng->conf.max_bytes;
+    vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                               check_rate_limit, vrng);
+    vrng->activate_timer = true;
+
+    vrng->vmstate = qemu_add_vm_change_state_handler(virtio_rng_vm_state_change,
+                                                     vrng);
+}
+
+static void virtio_rng_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIORNG *vrng = VIRTIO_RNG(dev);
+
+    qemu_del_vm_change_state_handler(vrng->vmstate);
+    timer_free(vrng->rate_limit_timer);
+    virtio_del_queue(vdev, 0);
+    virtio_cleanup(vdev);
+}
+
+static const VMStateDescription vmstate_virtio_rng = {
+    .name = "virtio-rng",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_rng_properties[] = {
+    /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s.  If
+     * you have an entropy source capable of generating more entropy than this
+     * and you can pass it through via virtio-rng, then hats off to you.  Until
+     * then, this is unlimited for all practical purposes.
+     */
+    DEFINE_PROP_UINT64("max-bytes", VirtIORNG, conf.max_bytes, INT64_MAX),
+    DEFINE_PROP_UINT32("period", VirtIORNG, conf.period_ms, 1 << 16),
+    DEFINE_PROP_LINK("rng", VirtIORNG, conf.rng, TYPE_RNG_BACKEND, RngBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, virtio_rng_properties);
+    dc->vmsd = &vmstate_virtio_rng;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_rng_device_realize;
+    vdc->unrealize = virtio_rng_device_unrealize;
+    vdc->get_features = get_features;
+    vdc->set_status = virtio_rng_set_status;
+}
+
+static const TypeInfo virtio_rng_info = {
+    .name = TYPE_VIRTIO_RNG,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIORNG),
+    .class_init = virtio_rng_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_rng_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c
new file mode 100644
index 00000000..e8e3442f
--- /dev/null
+++ b/hw/virtio/virtio-scsi-pci.c
@@ -0,0 +1,114 @@
+/*
+ * Virtio scsi PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "qemu/module.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VirtIOSCSIPCI VirtIOSCSIPCI;
+
+/*
+ * virtio-scsi-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOSCSIPCI, VIRTIO_SCSI_PCI,
+                         TYPE_VIRTIO_SCSI_PCI)
+
+struct VirtIOSCSIPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOSCSI vdev;
+};
+
+static Property virtio_scsi_pci_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOSCSIPCI *dev = VIRTIO_SCSI_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    DeviceState *proxy = DEVICE(vpci_dev);
+    VirtIOSCSIConf *conf = &dev->vdev.parent_obj.conf;
+    char *bus_name;
+
+    if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) {
+        conf->num_queues =
+            virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED);
+    }
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = conf->num_queues + VIRTIO_SCSI_VQ_NUM_FIXED + 1;
+    }
+
+    /*
+     * For command line compatibility, this sets the virtio-scsi-device bus
+     * name as before.
+     */
+    if (proxy->id) {
+        bus_name = g_strdup_printf("%s.0", proxy->id);
+        virtio_device_set_child_bus_name(VIRTIO_DEVICE(vdev), bus_name);
+        g_free(bus_name);
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void virtio_scsi_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = virtio_scsi_pci_realize;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    device_class_set_props(dc, virtio_scsi_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void virtio_scsi_pci_instance_init(Object *obj)
+{
+    VirtIOSCSIPCI *dev = VIRTIO_SCSI_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_SCSI);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = {
+    .base_name              = TYPE_VIRTIO_SCSI_PCI,
+    .generic_name           = "virtio-scsi-pci",
+    .transitional_name      = "virtio-scsi-pci-transitional",
+    .non_transitional_name  = "virtio-scsi-pci-non-transitional",
+    .instance_size = sizeof(VirtIOSCSIPCI),
+    .instance_init = virtio_scsi_pci_instance_init,
+    .class_init    = virtio_scsi_pci_class_init,
+};
+
+static void virtio_scsi_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_scsi_pci_info);
+}
+
+type_init(virtio_scsi_pci_register)
diff --git a/hw/virtio/virtio-serial-pci.c b/hw/virtio/virtio-serial-pci.c
new file mode 100644
index 00000000..cea31adc
--- /dev/null
+++ b/hw/virtio/virtio-serial-pci.c
@@ -0,0 +1,117 @@
+/*
+ * Virtio serial PCI Bindings
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2009 CodeSourcery
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Paul Brook        <paul@codesourcery.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-serial.h"
+#include "qemu/module.h"
+#include "hw/virtio/virtio-pci.h"
+#include "qom/object.h"
+
+typedef struct VirtIOSerialPCI VirtIOSerialPCI;
+
+/*
+ * virtio-serial-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base"
+DECLARE_INSTANCE_CHECKER(VirtIOSerialPCI, VIRTIO_SERIAL_PCI,
+                         TYPE_VIRTIO_SERIAL_PCI)
+
+struct VirtIOSerialPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOSerial vdev;
+};
+
+static void virtio_serial_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOSerialPCI *dev = VIRTIO_SERIAL_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    DeviceState *proxy = DEVICE(vpci_dev);
+    char *bus_name;
+
+    if (vpci_dev->class_code != PCI_CLASS_COMMUNICATION_OTHER &&
+        vpci_dev->class_code != PCI_CLASS_DISPLAY_OTHER && /* qemu 0.10 */
+        vpci_dev->class_code != PCI_CLASS_OTHERS) {        /* qemu-kvm  */
+            vpci_dev->class_code = PCI_CLASS_COMMUNICATION_OTHER;
+    }
+
+    /* backwards-compatibility with machines that were created with
+       DEV_NVECTORS_UNSPECIFIED */
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = dev->vdev.serial.max_virtserial_ports + 1;
+    }
+
+    /*
+     * For command line compatibility, this sets the virtio-serial-device bus
+     * name as before.
+     */
+    if (proxy->id) {
+        bus_name = g_strdup_printf("%s.0", proxy->id);
+        virtio_device_set_child_bus_name(VIRTIO_DEVICE(vdev), bus_name);
+        g_free(bus_name);
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static Property virtio_serial_pci_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_serial_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = virtio_serial_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    device_class_set_props(dc, virtio_serial_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_CONSOLE;
+    pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void virtio_serial_pci_instance_init(Object *obj)
+{
+    VirtIOSerialPCI *dev = VIRTIO_SERIAL_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_SERIAL);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = {
+    .base_name             = TYPE_VIRTIO_SERIAL_PCI,
+    .generic_name          = "virtio-serial-pci",
+    .transitional_name     = "virtio-serial-pci-transitional",
+    .non_transitional_name = "virtio-serial-pci-non-transitional",
+    .instance_size = sizeof(VirtIOSerialPCI),
+    .instance_init = virtio_serial_pci_instance_init,
+    .class_init    = virtio_serial_pci_class_init,
+};
+
+static void virtio_serial_pci_register(void)
+{
+    virtio_pci_types_register(&virtio_serial_pci_info);
+}
+
+type_init(virtio_serial_pci_register)
diff --git a/hw/virtio/virtio-stub.c b/hw/virtio/virtio-stub.c
new file mode 100644
index 00000000..7ddb22cc
--- /dev/null
+++ b/hw/virtio/virtio-stub.c
@@ -0,0 +1,42 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-virtio.h"
+
+static void *qmp_virtio_unsupported(Error **errp)
+{
+    error_setg(errp, "Virtio is disabled");
+    return NULL;
+}
+
+VirtioInfoList *qmp_x_query_virtio(Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
+
+VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
+
+VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
+                                                            uint16_t queue,
+                                                            Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
+
+VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
+                                                 uint16_t queue,
+                                                 Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
+
+VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
+                                                     uint16_t queue,
+                                                     bool has_index,
+                                                     uint16_t index,
+                                                     Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
new file mode 100644
index 00000000..eb6347ab
--- /dev/null
+++ b/hw/virtio/virtio.c
@@ -0,0 +1,4991 @@
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-virtio.h"
+#include "qapi/qapi-commands-qom.h"
+#include "qapi/qapi-visit-virtio.h"
+#include "qapi/qmp/qjson.h"
+#include "cpu.h"
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qom/object_interfaces.h"
+#include "hw/virtio/virtio.h"
+#include "migration/qemu-file-types.h"
+#include "qemu/atomic.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-access.h"
+#include "sysemu/dma.h"
+#include "sysemu/runstate.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/vhost_types.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "standard-headers/linux/virtio_console.h"
+#include "standard-headers/linux/virtio_gpu.h"
+#include "standard-headers/linux/virtio_net.h"
+#include "standard-headers/linux/virtio_scsi.h"
+#include "standard-headers/linux/virtio_i2c.h"
+#include "standard-headers/linux/virtio_balloon.h"
+#include "standard-headers/linux/virtio_iommu.h"
+#include "standard-headers/linux/virtio_mem.h"
+#include "standard-headers/linux/virtio_vsock.h"
+#include CONFIG_DEVICES
+
+/* QAPI list of realized VirtIODevices */
+static QTAILQ_HEAD(, VirtIODevice) virtio_list;
+
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
+#define FEATURE_ENTRY(name, desc) (qmp_virtio_feature_map_t) \
+    { .virtio_bit = name, .feature_desc = desc }
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
+    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
+    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
+    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
+    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
+    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
+    VHOST_USER_PROTOCOL_F_CONFIG = 9,
+    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
+    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
+    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+    VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
+    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+/* Virtio transport features mapping */
+static qmp_virtio_feature_map_t virtio_transport_map[] = {
+    /* Virtio device transport features */
+#ifndef VIRTIO_CONFIG_NO_LEGACY
+    FEATURE_ENTRY(VIRTIO_F_NOTIFY_ON_EMPTY, \
+            "VIRTIO_F_NOTIFY_ON_EMPTY: Notify when device runs out of avail. "
+            "descs. on VQ"),
+    FEATURE_ENTRY(VIRTIO_F_ANY_LAYOUT, \
+            "VIRTIO_F_ANY_LAYOUT: Device accepts arbitrary desc. layouts"),
+#endif /* !VIRTIO_CONFIG_NO_LEGACY */
+    FEATURE_ENTRY(VIRTIO_F_VERSION_1, \
+            "VIRTIO_F_VERSION_1: Device compliant for v1 spec (legacy)"),
+    FEATURE_ENTRY(VIRTIO_F_IOMMU_PLATFORM, \
+            "VIRTIO_F_IOMMU_PLATFORM: Device can be used on IOMMU platform"),
+    FEATURE_ENTRY(VIRTIO_F_RING_PACKED, \
+            "VIRTIO_F_RING_PACKED: Device supports packed VQ layout"),
+    FEATURE_ENTRY(VIRTIO_F_IN_ORDER, \
+            "VIRTIO_F_IN_ORDER: Device uses buffers in same order as made "
+            "available by driver"),
+    FEATURE_ENTRY(VIRTIO_F_ORDER_PLATFORM, \
+            "VIRTIO_F_ORDER_PLATFORM: Memory accesses ordered by platform"),
+    FEATURE_ENTRY(VIRTIO_F_SR_IOV, \
+            "VIRTIO_F_SR_IOV: Device supports single root I/O virtualization"),
+    /* Virtio ring transport features */
+    FEATURE_ENTRY(VIRTIO_RING_F_INDIRECT_DESC, \
+            "VIRTIO_RING_F_INDIRECT_DESC: Indirect descriptors supported"),
+    FEATURE_ENTRY(VIRTIO_RING_F_EVENT_IDX, \
+            "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled"),
+    { -1, "" }
+};
+
+/* Vhost-user protocol features mapping */
+static qmp_virtio_feature_map_t vhost_user_protocol_map[] = {
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_MQ, \
+            "VHOST_USER_PROTOCOL_F_MQ: Multiqueue protocol supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_LOG_SHMFD, \
+            "VHOST_USER_PROTOCOL_F_LOG_SHMFD: Shared log memory fd supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RARP, \
+            "VHOST_USER_PROTOCOL_F_RARP: Vhost-user back-end RARP broadcasting "
+            "supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_REPLY_ACK, \
+            "VHOST_USER_PROTOCOL_F_REPLY_ACK: Requested operation status ack. "
+            "supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \
+            "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \
+            "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated "
+            "requests supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \
+            "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy "
+            "devices supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CRYPTO_SESSION, \
+            "VHOST_USER_PROTOCOL_F_CRYPTO_SESSION: Session creation for crypto "
+            "operations supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_PAGEFAULT, \
+            "VHOST_USER_PROTOCOL_F_PAGEFAULT: Request servicing on userfaultfd "
+            "for accessed pages supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \
+            "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio "
+            "device configuration space supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \
+            "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication "
+            "channel supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \
+            "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified "
+            "VQs supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD, \
+            "VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD: Shared inflight I/O buffers "
+            "supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_RESET_DEVICE, \
+            "VHOST_USER_PROTOCOL_F_RESET_DEVICE: Disabling all rings and "
+            "resetting internal device state supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS, \
+            "VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS: In-band messaging "
+            "supported"),
+    FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS, \
+            "VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS: Configuration for "
+            "memory slots supported"),
+    { -1, "" }
+};
+
+/* virtio device configuration statuses */
+static qmp_virtio_feature_map_t virtio_config_status_map[] = {
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER_OK, \
+            "VIRTIO_CONFIG_S_DRIVER_OK: Driver setup and ready"),
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_FEATURES_OK, \
+            "VIRTIO_CONFIG_S_FEATURES_OK: Feature negotiation complete"),
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_DRIVER, \
+            "VIRTIO_CONFIG_S_DRIVER: Guest OS compatible with device"),
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_NEEDS_RESET, \
+            "VIRTIO_CONFIG_S_NEEDS_RESET: Irrecoverable error, device needs "
+            "reset"),
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_FAILED, \
+            "VIRTIO_CONFIG_S_FAILED: Error in guest, device failed"),
+    FEATURE_ENTRY(VIRTIO_CONFIG_S_ACKNOWLEDGE, \
+            "VIRTIO_CONFIG_S_ACKNOWLEDGE: Valid virtio device found"),
+    { -1, "" }
+};
+
+/* virtio-blk features mapping */
+qmp_virtio_feature_map_t virtio_blk_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_BLK_F_SIZE_MAX, \
+            "VIRTIO_BLK_F_SIZE_MAX: Max segment size is size_max"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_SEG_MAX, \
+            "VIRTIO_BLK_F_SEG_MAX: Max segments in a request is seg_max"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_GEOMETRY, \
+            "VIRTIO_BLK_F_GEOMETRY: Legacy geometry available"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_RO, \
+            "VIRTIO_BLK_F_RO: Device is read-only"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_BLK_SIZE, \
+            "VIRTIO_BLK_F_BLK_SIZE: Block size of disk available"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_TOPOLOGY, \
+            "VIRTIO_BLK_F_TOPOLOGY: Topology information available"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_MQ, \
+            "VIRTIO_BLK_F_MQ: Multiqueue supported"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_DISCARD, \
+            "VIRTIO_BLK_F_DISCARD: Discard command supported"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_WRITE_ZEROES, \
+            "VIRTIO_BLK_F_WRITE_ZEROES: Write zeroes command supported"),
+#ifndef VIRTIO_BLK_NO_LEGACY
+    FEATURE_ENTRY(VIRTIO_BLK_F_BARRIER, \
+            "VIRTIO_BLK_F_BARRIER: Request barriers supported"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_SCSI, \
+            "VIRTIO_BLK_F_SCSI: SCSI packet commands supported"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_FLUSH, \
+            "VIRTIO_BLK_F_FLUSH: Flush command supported"),
+    FEATURE_ENTRY(VIRTIO_BLK_F_CONFIG_WCE, \
+            "VIRTIO_BLK_F_CONFIG_WCE: Cache writeback and writethrough modes "
+            "supported"),
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio-serial features mapping */
+qmp_virtio_feature_map_t virtio_serial_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_CONSOLE_F_SIZE, \
+            "VIRTIO_CONSOLE_F_SIZE: Host providing console size"),
+    FEATURE_ENTRY(VIRTIO_CONSOLE_F_MULTIPORT, \
+            "VIRTIO_CONSOLE_F_MULTIPORT: Multiple ports for device supported"),
+    FEATURE_ENTRY(VIRTIO_CONSOLE_F_EMERG_WRITE, \
+            "VIRTIO_CONSOLE_F_EMERG_WRITE: Emergency write supported"),
+    { -1, "" }
+};
+
+/* virtio-gpu features mapping */
+qmp_virtio_feature_map_t virtio_gpu_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_GPU_F_VIRGL, \
+            "VIRTIO_GPU_F_VIRGL: Virgl 3D mode supported"),
+    FEATURE_ENTRY(VIRTIO_GPU_F_EDID, \
+            "VIRTIO_GPU_F_EDID: EDID metadata supported"),
+    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_UUID, \
+            "VIRTIO_GPU_F_RESOURCE_UUID: Resource UUID assigning supported"),
+    FEATURE_ENTRY(VIRTIO_GPU_F_RESOURCE_BLOB, \
+            "VIRTIO_GPU_F_RESOURCE_BLOB: Size-based blob resources supported"),
+    FEATURE_ENTRY(VIRTIO_GPU_F_CONTEXT_INIT, \
+            "VIRTIO_GPU_F_CONTEXT_INIT: Context types and synchronization "
+            "timelines supported"),
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio-input features mapping */
+qmp_virtio_feature_map_t virtio_input_feature_map[] = {
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio-net features mapping */
+qmp_virtio_feature_map_t virtio_net_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_NET_F_CSUM, \
+            "VIRTIO_NET_F_CSUM: Device handling packets with partial checksum "
+            "supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_CSUM, \
+            "VIRTIO_NET_F_GUEST_CSUM: Driver handling packets with partial "
+            "checksum supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
+            "VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: Control channel offloading "
+            "reconfig. supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_MTU, \
+            "VIRTIO_NET_F_MTU: Device max MTU reporting supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_MAC, \
+            "VIRTIO_NET_F_MAC: Device has given MAC address"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO4, \
+            "VIRTIO_NET_F_GUEST_TSO4: Driver can receive TSOv4"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_TSO6, \
+            "VIRTIO_NET_F_GUEST_TSO6: Driver can receive TSOv6"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ECN, \
+            "VIRTIO_NET_F_GUEST_ECN: Driver can receive TSO with ECN"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UFO, \
+            "VIRTIO_NET_F_GUEST_UFO: Driver can receive UFO"),
+    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO4, \
+            "VIRTIO_NET_F_HOST_TSO4: Device can receive TSOv4"),
+    FEATURE_ENTRY(VIRTIO_NET_F_HOST_TSO6, \
+            "VIRTIO_NET_F_HOST_TSO6: Device can receive TSOv6"),
+    FEATURE_ENTRY(VIRTIO_NET_F_HOST_ECN, \
+            "VIRTIO_NET_F_HOST_ECN: Device can receive TSO with ECN"),
+    FEATURE_ENTRY(VIRTIO_NET_F_HOST_UFO, \
+            "VIRTIO_NET_F_HOST_UFO: Device can receive UFO"),
+    FEATURE_ENTRY(VIRTIO_NET_F_MRG_RXBUF, \
+            "VIRTIO_NET_F_MRG_RXBUF: Driver can merge receive buffers"),
+    FEATURE_ENTRY(VIRTIO_NET_F_STATUS, \
+            "VIRTIO_NET_F_STATUS: Configuration status field available"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VQ, \
+            "VIRTIO_NET_F_CTRL_VQ: Control channel available"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX, \
+            "VIRTIO_NET_F_CTRL_RX: Control channel RX mode supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_VLAN, \
+            "VIRTIO_NET_F_CTRL_VLAN: Control channel VLAN filtering supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_RX_EXTRA, \
+            "VIRTIO_NET_F_CTRL_RX_EXTRA: Extra RX mode control supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_GUEST_ANNOUNCE, \
+            "VIRTIO_NET_F_GUEST_ANNOUNCE: Driver sending gratuitous packets "
+            "supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_MQ, \
+            "VIRTIO_NET_F_MQ: Multiqueue with automatic receive steering "
+            "supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_CTRL_MAC_ADDR, \
+            "VIRTIO_NET_F_CTRL_MAC_ADDR: MAC address set through control "
+            "channel"),
+    FEATURE_ENTRY(VIRTIO_NET_F_HASH_REPORT, \
+            "VIRTIO_NET_F_HASH_REPORT: Hash reporting supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_RSS, \
+            "VIRTIO_NET_F_RSS: RSS RX steering supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_RSC_EXT, \
+            "VIRTIO_NET_F_RSC_EXT: Extended coalescing info supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_STANDBY, \
+            "VIRTIO_NET_F_STANDBY: Device acting as standby for primary "
+            "device with same MAC addr. supported"),
+    FEATURE_ENTRY(VIRTIO_NET_F_SPEED_DUPLEX, \
+            "VIRTIO_NET_F_SPEED_DUPLEX: Device set linkspeed and duplex"),
+#ifndef VIRTIO_NET_NO_LEGACY
+    FEATURE_ENTRY(VIRTIO_NET_F_GSO, \
+            "VIRTIO_NET_F_GSO: Handling GSO-type packets supported"),
+#endif /* !VIRTIO_NET_NO_LEGACY */
+    FEATURE_ENTRY(VHOST_NET_F_VIRTIO_NET_HDR, \
+            "VHOST_NET_F_VIRTIO_NET_HDR: Virtio-net headers for RX and TX "
+            "packets supported"),
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio-scsi features mapping */
+qmp_virtio_feature_map_t virtio_scsi_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_SCSI_F_INOUT, \
+            "VIRTIO_SCSI_F_INOUT: Requests including read and writable data "
+            "buffers suppoted"),
+    FEATURE_ENTRY(VIRTIO_SCSI_F_HOTPLUG, \
+            "VIRTIO_SCSI_F_HOTPLUG: Reporting and handling hot-plug events "
+            "supported"),
+    FEATURE_ENTRY(VIRTIO_SCSI_F_CHANGE, \
+            "VIRTIO_SCSI_F_CHANGE: Reporting and handling LUN changes "
+            "supported"),
+    FEATURE_ENTRY(VIRTIO_SCSI_F_T10_PI, \
+            "VIRTIO_SCSI_F_T10_PI: T10 info included in request header"),
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio/vhost-user-fs features mapping */
+qmp_virtio_feature_map_t virtio_fs_feature_map[] = {
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio/vhost-user-i2c features mapping */
+qmp_virtio_feature_map_t virtio_i2c_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, \
+            "VIRTIO_I2C_F_ZERO_LEGNTH_REQUEST: Zero length requests supported"),
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio/vhost-vsock features mapping */
+qmp_virtio_feature_map_t virtio_vsock_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_VSOCK_F_SEQPACKET, \
+            "VIRTIO_VSOCK_F_SEQPACKET: SOCK_SEQPACKET supported"),
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/* virtio-balloon features mapping */
+qmp_virtio_feature_map_t virtio_balloon_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_MUST_TELL_HOST, \
+            "VIRTIO_BALLOON_F_MUST_TELL_HOST: Tell host before reclaiming "
+            "pages"),
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_STATS_VQ, \
+            "VIRTIO_BALLOON_F_STATS_VQ: Guest memory stats VQ available"),
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_DEFLATE_ON_OOM, \
+            "VIRTIO_BALLOON_F_DEFLATE_ON_OOM: Deflate balloon when guest OOM"),
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_FREE_PAGE_HINT, \
+            "VIRTIO_BALLOON_F_FREE_PAGE_HINT: VQ reporting free pages enabled"),
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_PAGE_POISON, \
+            "VIRTIO_BALLOON_F_PAGE_POISON: Guest page poisoning enabled"),
+    FEATURE_ENTRY(VIRTIO_BALLOON_F_REPORTING, \
+            "VIRTIO_BALLOON_F_REPORTING: Page reporting VQ enabled"),
+    { -1, "" }
+};
+
+/* virtio-crypto features mapping */
+qmp_virtio_feature_map_t virtio_crypto_feature_map[] = {
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    { -1, "" }
+};
+
+/* virtio-iommu features mapping */
+qmp_virtio_feature_map_t virtio_iommu_feature_map[] = {
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_INPUT_RANGE, \
+            "VIRTIO_IOMMU_F_INPUT_RANGE: Range of available virtual addrs. "
+            "available"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_DOMAIN_RANGE, \
+            "VIRTIO_IOMMU_F_DOMAIN_RANGE: Number of supported domains "
+            "available"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_MAP_UNMAP, \
+            "VIRTIO_IOMMU_F_MAP_UNMAP: Map and unmap requests available"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS, \
+            "VIRTIO_IOMMU_F_BYPASS: Endpoints not attached to domains are in "
+            "bypass mode"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_PROBE, \
+            "VIRTIO_IOMMU_F_PROBE: Probe requests available"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_MMIO, \
+            "VIRTIO_IOMMU_F_MMIO: VIRTIO_IOMMU_MAP_F_MMIO flag available"),
+    FEATURE_ENTRY(VIRTIO_IOMMU_F_BYPASS_CONFIG, \
+            "VIRTIO_IOMMU_F_BYPASS_CONFIG: Bypass field of IOMMU config "
+            "available"),
+    { -1, "" }
+};
+
+/* virtio-mem features mapping */
+qmp_virtio_feature_map_t virtio_mem_feature_map[] = {
+#ifndef CONFIG_ACPI
+    FEATURE_ENTRY(VIRTIO_MEM_F_ACPI_PXM, \
+            "VIRTIO_MEM_F_ACPI_PXM: node_id is an ACPI PXM and is valid"),
+#endif /* !CONFIG_ACPI */
+    FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \
+            "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be "
+            "accessed"),
+    { -1, "" }
+};
+
+/* virtio-rng features mapping */
+qmp_virtio_feature_map_t virtio_rng_feature_map[] = {
+    FEATURE_ENTRY(VHOST_F_LOG_ALL, \
+            "VHOST_F_LOG_ALL: Logging write descriptors supported"),
+    FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
+            "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
+            "negotiation supported"),
+    { -1, "" }
+};
+
+/*
+ * The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. This is the default, used by transports like PCI
+ * which don't provide a means for the guest to tell the host the alignment.
+ */
+#define VIRTIO_PCI_VRING_ALIGN         4096
+
+typedef struct VRingDesc
+{
+    uint64_t addr;
+    uint32_t len;
+    uint16_t flags;
+    uint16_t next;
+} VRingDesc;
+
+typedef struct VRingPackedDesc {
+    uint64_t addr;
+    uint32_t len;
+    uint16_t id;
+    uint16_t flags;
+} VRingPackedDesc;
+
+typedef struct VRingAvail
+{
+    uint16_t flags;
+    uint16_t idx;
+    uint16_t ring[];
+} VRingAvail;
+
+typedef struct VRingUsedElem
+{
+    uint32_t id;
+    uint32_t len;
+} VRingUsedElem;
+
+typedef struct VRingUsed
+{
+    uint16_t flags;
+    uint16_t idx;
+    VRingUsedElem ring[];
+} VRingUsed;
+
+typedef struct VRingMemoryRegionCaches {
+    struct rcu_head rcu;
+    MemoryRegionCache desc;
+    MemoryRegionCache avail;
+    MemoryRegionCache used;
+} VRingMemoryRegionCaches;
+
+typedef struct VRing
+{
+    unsigned int num;
+    unsigned int num_default;
+    unsigned int align;
+    hwaddr desc;
+    hwaddr avail;
+    hwaddr used;
+    VRingMemoryRegionCaches *caches;
+} VRing;
+
+typedef struct VRingPackedDescEvent {
+    uint16_t off_wrap;
+    uint16_t flags;
+} VRingPackedDescEvent ;
+
+struct VirtQueue
+{
+    VRing vring;
+    VirtQueueElement *used_elems;
+
+    /* Next head to pop */
+    uint16_t last_avail_idx;
+    bool last_avail_wrap_counter;
+
+    /* Last avail_idx read from VQ. */
+    uint16_t shadow_avail_idx;
+    bool shadow_avail_wrap_counter;
+
+    uint16_t used_idx;
+    bool used_wrap_counter;
+
+    /* Last used index value we have signalled on */
+    uint16_t signalled_used;
+
+    /* Last used index value we have signalled on */
+    bool signalled_used_valid;
+
+    /* Notification enabled? */
+    bool notification;
+
+    uint16_t queue_index;
+
+    unsigned int inuse;
+
+    uint16_t vector;
+    VirtIOHandleOutput handle_output;
+    VirtIODevice *vdev;
+    EventNotifier guest_notifier;
+    EventNotifier host_notifier;
+    bool host_notifier_enabled;
+    QLIST_ENTRY(VirtQueue) node;
+};
+
+const char *virtio_device_names[] = {
+    [VIRTIO_ID_NET] = "virtio-net",
+    [VIRTIO_ID_BLOCK] = "virtio-blk",
+    [VIRTIO_ID_CONSOLE] = "virtio-serial",
+    [VIRTIO_ID_RNG] = "virtio-rng",
+    [VIRTIO_ID_BALLOON] = "virtio-balloon",
+    [VIRTIO_ID_IOMEM] = "virtio-iomem",
+    [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
+    [VIRTIO_ID_SCSI] = "virtio-scsi",
+    [VIRTIO_ID_9P] = "virtio-9p",
+    [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
+    [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
+    [VIRTIO_ID_CAIF] = "virtio-caif",
+    [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
+    [VIRTIO_ID_GPU] = "virtio-gpu",
+    [VIRTIO_ID_CLOCK] = "virtio-clk",
+    [VIRTIO_ID_INPUT] = "virtio-input",
+    [VIRTIO_ID_VSOCK] = "vhost-vsock",
+    [VIRTIO_ID_CRYPTO] = "virtio-crypto",
+    [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
+    [VIRTIO_ID_PSTORE] = "virtio-pstore",
+    [VIRTIO_ID_IOMMU] = "virtio-iommu",
+    [VIRTIO_ID_MEM] = "virtio-mem",
+    [VIRTIO_ID_SOUND] = "virtio-sound",
+    [VIRTIO_ID_FS] = "virtio-user-fs",
+    [VIRTIO_ID_PMEM] = "virtio-pmem",
+    [VIRTIO_ID_RPMB] = "virtio-rpmb",
+    [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
+    [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
+    [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
+    [VIRTIO_ID_SCMI] = "virtio-scmi",
+    [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
+    [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
+    [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
+    [VIRTIO_ID_CAN] = "virtio-can",
+    [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
+    [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
+    [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
+    [VIRTIO_ID_BT] = "virtio-bluetooth",
+    [VIRTIO_ID_GPIO] = "virtio-gpio"
+};
+
+static const char *virtio_id_to_name(uint16_t device_id)
+{
+    assert(device_id < G_N_ELEMENTS(virtio_device_names));
+    const char *name = virtio_device_names[device_id];
+    assert(name != NULL);
+    return name;
+}
+
+/* Called within call_rcu().  */
+static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
+{
+    assert(caches != NULL);
+    address_space_cache_destroy(&caches->desc);
+    address_space_cache_destroy(&caches->avail);
+    address_space_cache_destroy(&caches->used);
+    g_free(caches);
+}
+
+static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches;
+
+    caches = qatomic_read(&vq->vring.caches);
+    qatomic_rcu_set(&vq->vring.caches, NULL);
+    if (caches) {
+        call_rcu(caches, virtio_free_region_cache, rcu);
+    }
+}
+
+static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+{
+    VirtQueue *vq = &vdev->vq[n];
+    VRingMemoryRegionCaches *old = vq->vring.caches;
+    VRingMemoryRegionCaches *new = NULL;
+    hwaddr addr, size;
+    int64_t len;
+    bool packed;
+
+
+    addr = vq->vring.desc;
+    if (!addr) {
+        goto out_no_cache;
+    }
+    new = g_new0(VRingMemoryRegionCaches, 1);
+    size = virtio_queue_get_desc_size(vdev, n);
+    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+                                   true : false;
+    len = address_space_cache_init(&new->desc, vdev->dma_as,
+                                   addr, size, packed);
+    if (len < size) {
+        virtio_error(vdev, "Cannot map desc");
+        goto err_desc;
+    }
+
+    size = virtio_queue_get_used_size(vdev, n);
+    len = address_space_cache_init(&new->used, vdev->dma_as,
+                                   vq->vring.used, size, true);
+    if (len < size) {
+        virtio_error(vdev, "Cannot map used");
+        goto err_used;
+    }
+
+    size = virtio_queue_get_avail_size(vdev, n);
+    len = address_space_cache_init(&new->avail, vdev->dma_as,
+                                   vq->vring.avail, size, false);
+    if (len < size) {
+        virtio_error(vdev, "Cannot map avail");
+        goto err_avail;
+    }
+
+    qatomic_rcu_set(&vq->vring.caches, new);
+    if (old) {
+        call_rcu(old, virtio_free_region_cache, rcu);
+    }
+    return;
+
+err_avail:
+    address_space_cache_destroy(&new->avail);
+err_used:
+    address_space_cache_destroy(&new->used);
+err_desc:
+    address_space_cache_destroy(&new->desc);
+out_no_cache:
+    g_free(new);
+    virtio_virtqueue_reset_region_cache(vq);
+}
+
+/* virt queue functions */
+void virtio_queue_update_rings(VirtIODevice *vdev, int n)
+{
+    VRing *vring = &vdev->vq[n].vring;
+
+    if (!vring->num || !vring->desc || !vring->align) {
+        /* not yet setup -> nothing to do */
+        return;
+    }
+    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
+    vring->used = vring_align(vring->avail +
+                              offsetof(VRingAvail, ring[vring->num]),
+                              vring->align);
+    virtio_init_region_cache(vdev, n);
+}
+
+/* Called within rcu_read_lock().  */
+static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
+                                  MemoryRegionCache *cache, int i)
+{
+    address_space_read_cached(cache, i * sizeof(VRingDesc),
+                              desc, sizeof(VRingDesc));
+    virtio_tswap64s(vdev, &desc->addr);
+    virtio_tswap32s(vdev, &desc->len);
+    virtio_tswap16s(vdev, &desc->flags);
+    virtio_tswap16s(vdev, &desc->next);
+}
+
+static void vring_packed_event_read(VirtIODevice *vdev,
+                                    MemoryRegionCache *cache,
+                                    VRingPackedDescEvent *e)
+{
+    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
+    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
+
+    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
+    /* Make sure flags is seen before off_wrap */
+    smp_rmb();
+    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
+    virtio_tswap16s(vdev, &e->flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+                                        MemoryRegionCache *cache,
+                                        uint16_t off_wrap)
+{
+    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
+
+    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
+    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+                                     MemoryRegionCache *cache, uint16_t flags)
+{
+    hwaddr off = offsetof(VRingPackedDescEvent, flags);
+
+    virtio_stw_phys_cached(vdev, cache, off, flags);
+    address_space_cache_invalidate(cache, off, sizeof(flags));
+}
+
+/* Called within rcu_read_lock().  */
+static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
+{
+    return qatomic_rcu_read(&vq->vring.caches);
+}
+
+/* Called within rcu_read_lock().  */
+static inline uint16_t vring_avail_flags(VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingAvail, flags);
+
+    if (!caches) {
+        return 0;
+    }
+
+    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
+}
+
+/* Called within rcu_read_lock().  */
+static inline uint16_t vring_avail_idx(VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingAvail, idx);
+
+    if (!caches) {
+        return 0;
+    }
+
+    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
+    return vq->shadow_avail_idx;
+}
+
+/* Called within rcu_read_lock().  */
+static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingAvail, ring[i]);
+
+    if (!caches) {
+        return 0;
+    }
+
+    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
+}
+
+/* Called within rcu_read_lock().  */
+static inline uint16_t vring_get_used_event(VirtQueue *vq)
+{
+    return vring_avail_ring(vq, vq->vring.num);
+}
+
+/* Called within rcu_read_lock().  */
+static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
+                                    int i)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingUsed, ring[i]);
+
+    if (!caches) {
+        return;
+    }
+
+    virtio_tswap32s(vq->vdev, &uelem->id);
+    virtio_tswap32s(vq->vdev, &uelem->len);
+    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
+    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
+}
+
+/* Called within rcu_read_lock(). */
+static inline uint16_t vring_used_flags(VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingUsed, flags);
+
+    if (!caches) {
+        return 0;
+    }
+
+    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+}
+
+/* Called within rcu_read_lock().  */
+static uint16_t vring_used_idx(VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingUsed, idx);
+
+    if (!caches) {
+        return 0;
+    }
+
+    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+}
+
+/* Called within rcu_read_lock().  */
+static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    hwaddr pa = offsetof(VRingUsed, idx);
+
+    if (caches) {
+        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
+    }
+
+    vq->used_idx = val;
+}
+
+/* Called within rcu_read_lock().  */
+static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    VirtIODevice *vdev = vq->vdev;
+    hwaddr pa = offsetof(VRingUsed, flags);
+    uint16_t flags;
+
+    if (!caches) {
+        return;
+    }
+
+    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
+}
+
+/* Called within rcu_read_lock().  */
+static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
+{
+    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+    VirtIODevice *vdev = vq->vdev;
+    hwaddr pa = offsetof(VRingUsed, flags);
+    uint16_t flags;
+
+    if (!caches) {
+        return;
+    }
+
+    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
+}
+
+/* Called within rcu_read_lock().  */
+static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
+{
+    VRingMemoryRegionCaches *caches;
+    hwaddr pa;
+    if (!vq->notification) {
+        return;
+    }
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return;
+    }
+
+    pa = offsetof(VRingUsed, ring[vq->vring.num]);
+    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
+}
+
+static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
+{
+    RCU_READ_LOCK_GUARD();
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vring_avail_idx(vq));
+    } else if (enable) {
+        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+    } else {
+        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+    }
+    if (enable) {
+        /* Expose avail event/used flags before caller checks the avail idx. */
+        smp_mb();
+    }
+}
+
+static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
+{
+    uint16_t off_wrap;
+    VRingPackedDescEvent e;
+    VRingMemoryRegionCaches *caches;
+
+    RCU_READ_LOCK_GUARD();
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return;
+    }
+
+    vring_packed_event_read(vq->vdev, &caches->used, &e);
+
+    if (!enable) {
+        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
+    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
+        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
+        /* Make sure off_wrap is wrote before flags */
+        smp_wmb();
+        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
+    } else {
+        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
+    }
+
+    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
+    if (enable) {
+        /* Expose avail event/used flags before caller checks the avail idx. */
+        smp_mb();
+    }
+}
+
+bool virtio_queue_get_notification(VirtQueue *vq)
+{
+    return vq->notification;
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+    vq->notification = enable;
+
+    if (!vq->vring.desc) {
+        return;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtio_queue_packed_set_notification(vq, enable);
+    } else {
+        virtio_queue_split_set_notification(vq, enable);
+    }
+}
+
+int virtio_queue_ready(VirtQueue *vq)
+{
+    return vq->vring.avail != 0;
+}
+
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+                                         uint16_t *flags,
+                                         MemoryRegionCache *cache,
+                                         int i)
+{
+    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
+
+    *flags = virtio_lduw_phys_cached(vdev, cache, off);
+}
+
+static void vring_packed_desc_read(VirtIODevice *vdev,
+                                   VRingPackedDesc *desc,
+                                   MemoryRegionCache *cache,
+                                   int i, bool strict_order)
+{
+    hwaddr off = i * sizeof(VRingPackedDesc);
+
+    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
+
+    if (strict_order) {
+        /* Make sure flags is read before the rest fields. */
+        smp_rmb();
+    }
+
+    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
+                              &desc->addr, sizeof(desc->addr));
+    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
+                              &desc->id, sizeof(desc->id));
+    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
+                              &desc->len, sizeof(desc->len));
+    virtio_tswap64s(vdev, &desc->addr);
+    virtio_tswap16s(vdev, &desc->id);
+    virtio_tswap32s(vdev, &desc->len);
+}
+
+static void vring_packed_desc_write_data(VirtIODevice *vdev,
+                                         VRingPackedDesc *desc,
+                                         MemoryRegionCache *cache,
+                                         int i)
+{
+    hwaddr off_id = i * sizeof(VRingPackedDesc) +
+                    offsetof(VRingPackedDesc, id);
+    hwaddr off_len = i * sizeof(VRingPackedDesc) +
+                    offsetof(VRingPackedDesc, len);
+
+    virtio_tswap32s(vdev, &desc->len);
+    virtio_tswap16s(vdev, &desc->id);
+    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
+    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
+    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
+    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
+}
+
+static void vring_packed_desc_write_flags(VirtIODevice *vdev,
+                                          VRingPackedDesc *desc,
+                                          MemoryRegionCache *cache,
+                                          int i)
+{
+    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
+
+    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
+    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
+}
+
+static void vring_packed_desc_write(VirtIODevice *vdev,
+                                    VRingPackedDesc *desc,
+                                    MemoryRegionCache *cache,
+                                    int i, bool strict_order)
+{
+    vring_packed_desc_write_data(vdev, desc, cache, i);
+    if (strict_order) {
+        /* Make sure data is wrote before flags. */
+        smp_wmb();
+    }
+    vring_packed_desc_write_flags(vdev, desc, cache, i);
+}
+
+static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
+{
+    bool avail, used;
+
+    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
+    return (avail != used) && (avail == wrap_counter);
+}
+
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers.
+ * Called within rcu_read_lock().  */
+static int virtio_queue_empty_rcu(VirtQueue *vq)
+{
+    if (virtio_device_disabled(vq->vdev)) {
+        return 1;
+    }
+
+    if (unlikely(!vq->vring.avail)) {
+        return 1;
+    }
+
+    if (vq->shadow_avail_idx != vq->last_avail_idx) {
+        return 0;
+    }
+
+    return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+static int virtio_queue_split_empty(VirtQueue *vq)
+{
+    bool empty;
+
+    if (virtio_device_disabled(vq->vdev)) {
+        return 1;
+    }
+
+    if (unlikely(!vq->vring.avail)) {
+        return 1;
+    }
+
+    if (vq->shadow_avail_idx != vq->last_avail_idx) {
+        return 0;
+    }
+
+    RCU_READ_LOCK_GUARD();
+    empty = vring_avail_idx(vq) == vq->last_avail_idx;
+    return empty;
+}
+
+/* Called within rcu_read_lock().  */
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+    struct VRingPackedDesc desc;
+    VRingMemoryRegionCaches *cache;
+
+    if (unlikely(!vq->vring.desc)) {
+        return 1;
+    }
+
+    cache = vring_get_region_caches(vq);
+    if (!cache) {
+        return 1;
+    }
+
+    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
+                                 vq->last_avail_idx);
+
+    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+    RCU_READ_LOCK_GUARD();
+    return virtio_queue_packed_empty_rcu(vq);
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_queue_packed_empty(vq);
+    } else {
+        return virtio_queue_split_empty(vq);
+    }
+}
+
+static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
+                               unsigned int len)
+{
+    AddressSpace *dma_as = vq->vdev->dma_as;
+    unsigned int offset;
+    int i;
+
+    offset = 0;
+    for (i = 0; i < elem->in_num; i++) {
+        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
+
+        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
+                         elem->in_sg[i].iov_len,
+                         DMA_DIRECTION_FROM_DEVICE, size);
+
+        offset += size;
+    }
+
+    for (i = 0; i < elem->out_num; i++)
+        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
+                         elem->out_sg[i].iov_len,
+                         DMA_DIRECTION_TO_DEVICE,
+                         elem->out_sg[i].iov_len);
+}
+
+/* virtqueue_detach_element:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Detach the element from the virtqueue.  This function is suitable for device
+ * reset or other situations where a #VirtQueueElement is simply freed and will
+ * not be pushed or discarded.
+ */
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len)
+{
+    vq->inuse -= elem->ndescs;
+    virtqueue_unmap_sg(vq, elem, len);
+}
+
+static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
+{
+    vq->last_avail_idx -= num;
+}
+
+static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
+{
+    if (vq->last_avail_idx < num) {
+        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
+        vq->last_avail_wrap_counter ^= 1;
+    } else {
+        vq->last_avail_idx -= num;
+    }
+}
+
+/* virtqueue_unpop:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Pretend the most recent element wasn't popped from the virtqueue.  The next
+ * call to virtqueue_pop() will refetch the element.
+ */
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len)
+{
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_rewind(vq, 1);
+    } else {
+        virtqueue_split_rewind(vq, 1);
+    }
+
+    virtqueue_detach_element(vq, elem, len);
+}
+
+/* virtqueue_rewind:
+ * @vq: The #VirtQueue
+ * @num: Number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue.  The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Use virtqueue_unpop() instead if you have a VirtQueueElement.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
+{
+    if (num > vq->inuse) {
+        return false;
+    }
+
+    vq->inuse -= num;
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_rewind(vq, num);
+    } else {
+        virtqueue_split_rewind(vq, num);
+    }
+    return true;
+}
+
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len, unsigned int idx)
+{
+    VRingUsedElem uelem;
+
+    if (unlikely(!vq->vring.used)) {
+        return;
+    }
+
+    idx = (idx + vq->used_idx) % vq->vring.num;
+
+    uelem.id = elem->index;
+    uelem.len = len;
+    vring_used_write(vq, &uelem, idx);
+}
+
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+                                  unsigned int len, unsigned int idx)
+{
+    vq->used_elems[idx].index = elem->index;
+    vq->used_elems[idx].len = len;
+    vq->used_elems[idx].ndescs = elem->ndescs;
+}
+
+static void virtqueue_packed_fill_desc(VirtQueue *vq,
+                                       const VirtQueueElement *elem,
+                                       unsigned int idx,
+                                       bool strict_order)
+{
+    uint16_t head;
+    VRingMemoryRegionCaches *caches;
+    VRingPackedDesc desc = {
+        .id = elem->index,
+        .len = elem->len,
+    };
+    bool wrap_counter = vq->used_wrap_counter;
+
+    if (unlikely(!vq->vring.desc)) {
+        return;
+    }
+
+    head = vq->used_idx + idx;
+    if (head >= vq->vring.num) {
+        head -= vq->vring.num;
+        wrap_counter ^= 1;
+    }
+    if (wrap_counter) {
+        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
+        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
+    } else {
+        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
+        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
+    }
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return;
+    }
+
+    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
+}
+
+/* Called within rcu_read_lock().  */
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len, unsigned int idx)
+{
+    trace_virtqueue_fill(vq, elem, len, idx);
+
+    virtqueue_unmap_sg(vq, elem, len);
+
+    if (virtio_device_disabled(vq->vdev)) {
+        return;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_fill(vq, elem, len, idx);
+    } else {
+        virtqueue_split_fill(vq, elem, len, idx);
+    }
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
+{
+    uint16_t old, new;
+
+    if (unlikely(!vq->vring.used)) {
+        return;
+    }
+
+    /* Make sure buffer is written before we update index. */
+    smp_wmb();
+    trace_virtqueue_flush(vq, count);
+    old = vq->used_idx;
+    new = old + count;
+    vring_used_idx_set(vq, new);
+    vq->inuse -= count;
+    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
+        vq->signalled_used_valid = false;
+}
+
+static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
+{
+    unsigned int i, ndescs = 0;
+
+    if (unlikely(!vq->vring.desc)) {
+        return;
+    }
+
+    for (i = 1; i < count; i++) {
+        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
+        ndescs += vq->used_elems[i].ndescs;
+    }
+    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
+    ndescs += vq->used_elems[0].ndescs;
+
+    vq->inuse -= ndescs;
+    vq->used_idx += ndescs;
+    if (vq->used_idx >= vq->vring.num) {
+        vq->used_idx -= vq->vring.num;
+        vq->used_wrap_counter ^= 1;
+        vq->signalled_used_valid = false;
+    }
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
+    if (virtio_device_disabled(vq->vdev)) {
+        vq->inuse -= count;
+        return;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_flush(vq, count);
+    } else {
+        virtqueue_split_flush(vq, count);
+    }
+}
+
+void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len)
+{
+    RCU_READ_LOCK_GUARD();
+    virtqueue_fill(vq, elem, len, 0);
+    virtqueue_flush(vq, 1);
+}
+
+/* Called within rcu_read_lock().  */
+static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
+{
+    uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+    /* Check it isn't doing very strange things with descriptor numbers. */
+    if (num_heads > vq->vring.num) {
+        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
+                     idx, vq->shadow_avail_idx);
+        return -EINVAL;
+    }
+    /* On success, callers read a descriptor at vq->last_avail_idx.
+     * Make sure descriptor read does not bypass avail index read. */
+    if (num_heads) {
+        smp_rmb();
+    }
+
+    return num_heads;
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
+                               unsigned int *head)
+{
+    /* Grab the next descriptor number they're advertising, and increment
+     * the index we've seen. */
+    *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+    /* If their number is silly, that's a fatal mistake. */
+    if (*head >= vq->vring.num) {
+        virtio_error(vq->vdev, "Guest says index %u is available", *head);
+        return false;
+    }
+
+    return true;
+}
+
+enum {
+    VIRTQUEUE_READ_DESC_ERROR = -1,
+    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
+    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
+};
+
+static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
+                                          MemoryRegionCache *desc_cache,
+                                          unsigned int max, unsigned int *next)
+{
+    /* If this descriptor says it doesn't chain, we're done. */
+    if (!(desc->flags & VRING_DESC_F_NEXT)) {
+        return VIRTQUEUE_READ_DESC_DONE;
+    }
+
+    /* Check they're not leading us off end of descriptors. */
+    *next = desc->next;
+    /* Make sure compiler knows to grab that: we don't want it changing! */
+    smp_wmb();
+
+    if (*next >= max) {
+        virtio_error(vdev, "Desc next is %u", *next);
+        return VIRTQUEUE_READ_DESC_ERROR;
+    }
+
+    vring_split_desc_read(vdev, desc, desc_cache, *next);
+    return VIRTQUEUE_READ_DESC_MORE;
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+                            unsigned int *in_bytes, unsigned int *out_bytes,
+                            unsigned max_in_bytes, unsigned max_out_bytes,
+                            VRingMemoryRegionCaches *caches)
+{
+    VirtIODevice *vdev = vq->vdev;
+    unsigned int max, idx;
+    unsigned int total_bufs, in_total, out_total;
+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    int64_t len = 0;
+    int rc;
+
+    idx = vq->last_avail_idx;
+    total_bufs = in_total = out_total = 0;
+
+    max = vq->vring.num;
+
+    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
+        MemoryRegionCache *desc_cache = &caches->desc;
+        unsigned int num_bufs;
+        VRingDesc desc;
+        unsigned int i;
+
+        num_bufs = total_bufs;
+
+        if (!virtqueue_get_head(vq, idx++, &i)) {
+            goto err;
+        }
+
+        vring_split_desc_read(vdev, &desc, desc_cache, i);
+
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
+            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
+                virtio_error(vdev, "Invalid size for indirect buffer table");
+                goto err;
+            }
+
+            /* If we've got too many, that implies a descriptor loop. */
+            if (num_bufs >= max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            /* loop over the indirect descriptor table */
+            len = address_space_cache_init(&indirect_desc_cache,
+                                           vdev->dma_as,
+                                           desc.addr, desc.len, false);
+            desc_cache = &indirect_desc_cache;
+            if (len < desc.len) {
+                virtio_error(vdev, "Cannot map indirect buffer");
+                goto err;
+            }
+
+            max = desc.len / sizeof(VRingDesc);
+            num_bufs = i = 0;
+            vring_split_desc_read(vdev, &desc, desc_cache, i);
+        }
+
+        do {
+            /* If we've got too many, that implies a descriptor loop. */
+            if (++num_bufs > max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            if (desc.flags & VRING_DESC_F_WRITE) {
+                in_total += desc.len;
+            } else {
+                out_total += desc.len;
+            }
+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+                goto done;
+            }
+
+            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+            goto err;
+        }
+
+        if (desc_cache == &indirect_desc_cache) {
+            address_space_cache_destroy(&indirect_desc_cache);
+            total_bufs++;
+        } else {
+            total_bufs = num_bufs;
+        }
+    }
+
+    if (rc < 0) {
+        goto err;
+    }
+
+done:
+    address_space_cache_destroy(&indirect_desc_cache);
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
+}
+
+static int virtqueue_packed_read_next_desc(VirtQueue *vq,
+                                           VRingPackedDesc *desc,
+                                           MemoryRegionCache
+                                           *desc_cache,
+                                           unsigned int max,
+                                           unsigned int *next,
+                                           bool indirect)
+{
+    /* If this descriptor says it doesn't chain, we're done. */
+    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
+        return VIRTQUEUE_READ_DESC_DONE;
+    }
+
+    ++*next;
+    if (*next == max) {
+        if (indirect) {
+            return VIRTQUEUE_READ_DESC_DONE;
+        } else {
+            (*next) -= vq->vring.num;
+        }
+    }
+
+    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
+    return VIRTQUEUE_READ_DESC_MORE;
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+                                             unsigned int *in_bytes,
+                                             unsigned int *out_bytes,
+                                             unsigned max_in_bytes,
+                                             unsigned max_out_bytes,
+                                             VRingMemoryRegionCaches *caches)
+{
+    VirtIODevice *vdev = vq->vdev;
+    unsigned int max, idx;
+    unsigned int total_bufs, in_total, out_total;
+    MemoryRegionCache *desc_cache;
+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    int64_t len = 0;
+    VRingPackedDesc desc;
+    bool wrap_counter;
+
+    idx = vq->last_avail_idx;
+    wrap_counter = vq->last_avail_wrap_counter;
+    total_bufs = in_total = out_total = 0;
+
+    max = vq->vring.num;
+
+    for (;;) {
+        unsigned int num_bufs = total_bufs;
+        unsigned int i = idx;
+        int rc;
+
+        desc_cache = &caches->desc;
+        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
+        if (!is_desc_avail(desc.flags, wrap_counter)) {
+            break;
+        }
+
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
+            if (desc.len % sizeof(VRingPackedDesc)) {
+                virtio_error(vdev, "Invalid size for indirect buffer table");
+                goto err;
+            }
+
+            /* If we've got too many, that implies a descriptor loop. */
+            if (num_bufs >= max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            /* loop over the indirect descriptor table */
+            len = address_space_cache_init(&indirect_desc_cache,
+                                           vdev->dma_as,
+                                           desc.addr, desc.len, false);
+            desc_cache = &indirect_desc_cache;
+            if (len < desc.len) {
+                virtio_error(vdev, "Cannot map indirect buffer");
+                goto err;
+            }
+
+            max = desc.len / sizeof(VRingPackedDesc);
+            num_bufs = i = 0;
+            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
+        }
+
+        do {
+            /* If we've got too many, that implies a descriptor loop. */
+            if (++num_bufs > max) {
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
+            }
+
+            if (desc.flags & VRING_DESC_F_WRITE) {
+                in_total += desc.len;
+            } else {
+                out_total += desc.len;
+            }
+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+                goto done;
+            }
+
+            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
+                                                 &i, desc_cache ==
+                                                 &indirect_desc_cache);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (desc_cache == &indirect_desc_cache) {
+            address_space_cache_destroy(&indirect_desc_cache);
+            total_bufs++;
+            idx++;
+        } else {
+            idx += num_bufs - total_bufs;
+            total_bufs = num_bufs;
+        }
+
+        if (idx >= vq->vring.num) {
+            idx -= vq->vring.num;
+            wrap_counter ^= 1;
+        }
+    }
+
+    /* Record the index and wrap counter for a kick we want */
+    vq->shadow_avail_idx = idx;
+    vq->shadow_avail_wrap_counter = wrap_counter;
+done:
+    address_space_cache_destroy(&indirect_desc_cache);
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
+}
+
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes,
+                               unsigned max_in_bytes, unsigned max_out_bytes)
+{
+    uint16_t desc_size;
+    VRingMemoryRegionCaches *caches;
+
+    RCU_READ_LOCK_GUARD();
+
+    if (unlikely(!vq->vring.desc)) {
+        goto err;
+    }
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        goto err;
+    }
+
+    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
+    if (caches->desc.len < vq->vring.num * desc_size) {
+        virtio_error(vq->vdev, "Cannot map descriptor ring");
+        goto err;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
+                                         max_in_bytes, max_out_bytes,
+                                         caches);
+    } else {
+        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
+                                        max_in_bytes, max_out_bytes,
+                                        caches);
+    }
+
+    return;
+err:
+    if (in_bytes) {
+        *in_bytes = 0;
+    }
+    if (out_bytes) {
+        *out_bytes = 0;
+    }
+}
+
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes)
+{
+    unsigned int in_total, out_total;
+
+    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
+    return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
+                               hwaddr *addr, struct iovec *iov,
+                               unsigned int max_num_sg, bool is_write,
+                               hwaddr pa, size_t sz)
+{
+    bool ok = false;
+    unsigned num_sg = *p_num_sg;
+    assert(num_sg <= max_num_sg);
+
+    if (!sz) {
+        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
+        goto out;
+    }
+
+    while (sz) {
+        hwaddr len = sz;
+
+        if (num_sg == max_num_sg) {
+            virtio_error(vdev, "virtio: too many write descriptors in "
+                               "indirect table");
+            goto out;
+        }
+
+        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
+                                              is_write ?
+                                              DMA_DIRECTION_FROM_DEVICE :
+                                              DMA_DIRECTION_TO_DEVICE,
+                                              MEMTXATTRS_UNSPECIFIED);
+        if (!iov[num_sg].iov_base) {
+            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
+            goto out;
+        }
+
+        iov[num_sg].iov_len = len;
+        addr[num_sg] = pa;
+
+        sz -= len;
+        pa += len;
+        num_sg++;
+    }
+    ok = true;
+
+out:
+    *p_num_sg = num_sg;
+    return ok;
+}
+
+/* Only used by error code paths before we have a VirtQueueElement (therefore
+ * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
+ * yet.
+ */
+static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
+                                    struct iovec *iov)
+{
+    unsigned int i;
+
+    for (i = 0; i < out_num + in_num; i++) {
+        int is_write = i >= out_num;
+
+        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
+        iov++;
+    }
+}
+
+static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
+                                hwaddr *addr, unsigned int num_sg,
+                                bool is_write)
+{
+    unsigned int i;
+    hwaddr len;
+
+    for (i = 0; i < num_sg; i++) {
+        len = sg[i].iov_len;
+        sg[i].iov_base = dma_memory_map(vdev->dma_as,
+                                        addr[i], &len, is_write ?
+                                        DMA_DIRECTION_FROM_DEVICE :
+                                        DMA_DIRECTION_TO_DEVICE,
+                                        MEMTXATTRS_UNSPECIFIED);
+        if (!sg[i].iov_base) {
+            error_report("virtio: error trying to map MMIO memory");
+            exit(1);
+        }
+        if (len != sg[i].iov_len) {
+            error_report("virtio: unexpected memory split");
+            exit(1);
+        }
+    }
+}
+
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
+{
+    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
+    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
+                                                                        false);
+}
+
+static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
+{
+    VirtQueueElement *elem;
+    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
+    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
+    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
+    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
+    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+    assert(sz >= sizeof(VirtQueueElement));
+    elem = g_malloc(out_sg_end);
+    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
+    elem->out_num = out_num;
+    elem->in_num = in_num;
+    elem->in_addr = (void *)elem + in_addr_ofs;
+    elem->out_addr = (void *)elem + out_addr_ofs;
+    elem->in_sg = (void *)elem + in_sg_ofs;
+    elem->out_sg = (void *)elem + out_sg_ofs;
+    return elem;
+}
+
+static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
+{
+    unsigned int i, head, max;
+    VRingMemoryRegionCaches *caches;
+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    MemoryRegionCache *desc_cache;
+    int64_t len;
+    VirtIODevice *vdev = vq->vdev;
+    VirtQueueElement *elem = NULL;
+    unsigned out_num, in_num, elem_entries;
+    hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    struct iovec iov[VIRTQUEUE_MAX_SIZE];
+    VRingDesc desc;
+    int rc;
+
+    RCU_READ_LOCK_GUARD();
+    if (virtio_queue_empty_rcu(vq)) {
+        goto done;
+    }
+    /* Needed after virtio_queue_empty(), see comment in
+     * virtqueue_num_heads(). */
+    smp_rmb();
+
+    /* When we start there are none of either input nor output. */
+    out_num = in_num = elem_entries = 0;
+
+    max = vq->vring.num;
+
+    if (vq->inuse >= vq->vring.num) {
+        virtio_error(vdev, "Virtqueue size exceeded");
+        goto done;
+    }
+
+    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+        goto done;
+    }
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vq->last_avail_idx);
+    }
+
+    i = head;
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        virtio_error(vdev, "Region caches not initialized");
+        goto done;
+    }
+
+    if (caches->desc.len < max * sizeof(VRingDesc)) {
+        virtio_error(vdev, "Cannot map descriptor ring");
+        goto done;
+    }
+
+    desc_cache = &caches->desc;
+    vring_split_desc_read(vdev, &desc, desc_cache, i);
+    if (desc.flags & VRING_DESC_F_INDIRECT) {
+        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
+            virtio_error(vdev, "Invalid size for indirect buffer table");
+            goto done;
+        }
+
+        /* loop over the indirect descriptor table */
+        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
+                                       desc.addr, desc.len, false);
+        desc_cache = &indirect_desc_cache;
+        if (len < desc.len) {
+            virtio_error(vdev, "Cannot map indirect buffer");
+            goto done;
+        }
+
+        max = desc.len / sizeof(VRingDesc);
+        i = 0;
+        vring_split_desc_read(vdev, &desc, desc_cache, i);
+    }
+
+    /* Collect all the descriptors */
+    do {
+        bool map_ok;
+
+        if (desc.flags & VRING_DESC_F_WRITE) {
+            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
+                                        iov + out_num,
+                                        VIRTQUEUE_MAX_SIZE - out_num, true,
+                                        desc.addr, desc.len);
+        } else {
+            if (in_num) {
+                virtio_error(vdev, "Incorrect order for descriptors");
+                goto err_undo_map;
+            }
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+                                        VIRTQUEUE_MAX_SIZE, false,
+                                        desc.addr, desc.len);
+        }
+        if (!map_ok) {
+            goto err_undo_map;
+        }
+
+        /* If we've got too many, that implies a descriptor loop. */
+        if (++elem_entries > max) {
+            virtio_error(vdev, "Looped descriptor");
+            goto err_undo_map;
+        }
+
+        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        goto err_undo_map;
+    }
+
+    /* Now copy what we have collected and mapped */
+    elem = virtqueue_alloc_element(sz, out_num, in_num);
+    elem->index = head;
+    elem->ndescs = 1;
+    for (i = 0; i < out_num; i++) {
+        elem->out_addr[i] = addr[i];
+        elem->out_sg[i] = iov[i];
+    }
+    for (i = 0; i < in_num; i++) {
+        elem->in_addr[i] = addr[out_num + i];
+        elem->in_sg[i] = iov[out_num + i];
+    }
+
+    vq->inuse++;
+
+    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
+done:
+    address_space_cache_destroy(&indirect_desc_cache);
+
+    return elem;
+
+err_undo_map:
+    virtqueue_undo_map_desc(out_num, in_num, iov);
+    goto done;
+}
+
+static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
+{
+    unsigned int i, max;
+    VRingMemoryRegionCaches *caches;
+    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    MemoryRegionCache *desc_cache;
+    int64_t len;
+    VirtIODevice *vdev = vq->vdev;
+    VirtQueueElement *elem = NULL;
+    unsigned out_num, in_num, elem_entries;
+    hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    struct iovec iov[VIRTQUEUE_MAX_SIZE];
+    VRingPackedDesc desc;
+    uint16_t id;
+    int rc;
+
+    RCU_READ_LOCK_GUARD();
+    if (virtio_queue_packed_empty_rcu(vq)) {
+        goto done;
+    }
+
+    /* When we start there are none of either input nor output. */
+    out_num = in_num = elem_entries = 0;
+
+    max = vq->vring.num;
+
+    if (vq->inuse >= vq->vring.num) {
+        virtio_error(vdev, "Virtqueue size exceeded");
+        goto done;
+    }
+
+    i = vq->last_avail_idx;
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        virtio_error(vdev, "Region caches not initialized");
+        goto done;
+    }
+
+    if (caches->desc.len < max * sizeof(VRingDesc)) {
+        virtio_error(vdev, "Cannot map descriptor ring");
+        goto done;
+    }
+
+    desc_cache = &caches->desc;
+    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
+    id = desc.id;
+    if (desc.flags & VRING_DESC_F_INDIRECT) {
+        if (desc.len % sizeof(VRingPackedDesc)) {
+            virtio_error(vdev, "Invalid size for indirect buffer table");
+            goto done;
+        }
+
+        /* loop over the indirect descriptor table */
+        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
+                                       desc.addr, desc.len, false);
+        desc_cache = &indirect_desc_cache;
+        if (len < desc.len) {
+            virtio_error(vdev, "Cannot map indirect buffer");
+            goto done;
+        }
+
+        max = desc.len / sizeof(VRingPackedDesc);
+        i = 0;
+        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
+    }
+
+    /* Collect all the descriptors */
+    do {
+        bool map_ok;
+
+        if (desc.flags & VRING_DESC_F_WRITE) {
+            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
+                                        iov + out_num,
+                                        VIRTQUEUE_MAX_SIZE - out_num, true,
+                                        desc.addr, desc.len);
+        } else {
+            if (in_num) {
+                virtio_error(vdev, "Incorrect order for descriptors");
+                goto err_undo_map;
+            }
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+                                        VIRTQUEUE_MAX_SIZE, false,
+                                        desc.addr, desc.len);
+        }
+        if (!map_ok) {
+            goto err_undo_map;
+        }
+
+        /* If we've got too many, that implies a descriptor loop. */
+        if (++elem_entries > max) {
+            virtio_error(vdev, "Looped descriptor");
+            goto err_undo_map;
+        }
+
+        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
+                                             desc_cache ==
+                                             &indirect_desc_cache);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    /* Now copy what we have collected and mapped */
+    elem = virtqueue_alloc_element(sz, out_num, in_num);
+    for (i = 0; i < out_num; i++) {
+        elem->out_addr[i] = addr[i];
+        elem->out_sg[i] = iov[i];
+    }
+    for (i = 0; i < in_num; i++) {
+        elem->in_addr[i] = addr[out_num + i];
+        elem->in_sg[i] = iov[out_num + i];
+    }
+
+    elem->index = id;
+    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
+    vq->last_avail_idx += elem->ndescs;
+    vq->inuse += elem->ndescs;
+
+    if (vq->last_avail_idx >= vq->vring.num) {
+        vq->last_avail_idx -= vq->vring.num;
+        vq->last_avail_wrap_counter ^= 1;
+    }
+
+    vq->shadow_avail_idx = vq->last_avail_idx;
+    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
+
+    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
+done:
+    address_space_cache_destroy(&indirect_desc_cache);
+
+    return elem;
+
+err_undo_map:
+    virtqueue_undo_map_desc(out_num, in_num, iov);
+    goto done;
+}
+
+void *virtqueue_pop(VirtQueue *vq, size_t sz)
+{
+    if (virtio_device_disabled(vq->vdev)) {
+        return NULL;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        return virtqueue_packed_pop(vq, sz);
+    } else {
+        return virtqueue_split_pop(vq, sz);
+    }
+}
+
+static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
+{
+    VRingMemoryRegionCaches *caches;
+    MemoryRegionCache *desc_cache;
+    unsigned int dropped = 0;
+    VirtQueueElement elem = {};
+    VirtIODevice *vdev = vq->vdev;
+    VRingPackedDesc desc;
+
+    RCU_READ_LOCK_GUARD();
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return 0;
+    }
+
+    desc_cache = &caches->desc;
+
+    virtio_queue_set_notification(vq, 0);
+
+    while (vq->inuse < vq->vring.num) {
+        unsigned int idx = vq->last_avail_idx;
+        /*
+         * works similar to virtqueue_pop but does not map buffers
+         * and does not allocate any memory.
+         */
+        vring_packed_desc_read(vdev, &desc, desc_cache,
+                               vq->last_avail_idx , true);
+        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
+            break;
+        }
+        elem.index = desc.id;
+        elem.ndescs = 1;
+        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
+                                               vq->vring.num, &idx, false)) {
+            ++elem.ndescs;
+        }
+        /*
+         * immediately push the element, nothing to unmap
+         * as both in_num and out_num are set to 0.
+         */
+        virtqueue_push(vq, &elem, 0);
+        dropped++;
+        vq->last_avail_idx += elem.ndescs;
+        if (vq->last_avail_idx >= vq->vring.num) {
+            vq->last_avail_idx -= vq->vring.num;
+            vq->last_avail_wrap_counter ^= 1;
+        }
+    }
+
+    return dropped;
+}
+
+static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
+{
+    unsigned int dropped = 0;
+    VirtQueueElement elem = {};
+    VirtIODevice *vdev = vq->vdev;
+    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+
+    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
+        /* works similar to virtqueue_pop but does not map buffers
+        * and does not allocate any memory */
+        smp_rmb();
+        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
+            break;
+        }
+        vq->inuse++;
+        vq->last_avail_idx++;
+        if (fEventIdx) {
+            vring_set_avail_event(vq, vq->last_avail_idx);
+        }
+        /* immediately push the element, nothing to unmap
+         * as both in_num and out_num are set to 0 */
+        virtqueue_push(vq, &elem, 0);
+        dropped++;
+    }
+
+    return dropped;
+}
+
+/* virtqueue_drop_all:
+ * @vq: The #VirtQueue
+ * Drops all queued buffers and indicates them to the guest
+ * as if they are done. Useful when buffers can not be
+ * processed but must be returned to the guest.
+ */
+unsigned int virtqueue_drop_all(VirtQueue *vq)
+{
+    struct VirtIODevice *vdev = vq->vdev;
+
+    if (virtio_device_disabled(vq->vdev)) {
+        return 0;
+    }
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return virtqueue_packed_drop_all(vq);
+    } else {
+        return virtqueue_split_drop_all(vq);
+    }
+}
+
+/* Reading and writing a structure directly to QEMUFile is *awful*, but
+ * it is what QEMU has always done by mistake.  We can change it sooner
+ * or later by bumping the version number of the affected vm states.
+ * In the meanwhile, since the in-memory layout of VirtQueueElement
+ * has changed, we need to marshal to and from the layout that was
+ * used before the change.
+ */
+typedef struct VirtQueueElementOld {
+    unsigned int index;
+    unsigned int out_num;
+    unsigned int in_num;
+    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
+    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
+    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
+    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
+} VirtQueueElementOld;
+
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
+{
+    VirtQueueElement *elem;
+    VirtQueueElementOld data;
+    int i;
+
+    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
+
+    /* TODO: teach all callers that this can fail, and return failure instead
+     * of asserting here.
+     * This is just one thing (there are probably more) that must be
+     * fixed before we can allow NDEBUG compilation.
+     */
+    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
+    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
+
+    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
+    elem->index = data.index;
+
+    for (i = 0; i < elem->in_num; i++) {
+        elem->in_addr[i] = data.in_addr[i];
+    }
+
+    for (i = 0; i < elem->out_num; i++) {
+        elem->out_addr[i] = data.out_addr[i];
+    }
+
+    for (i = 0; i < elem->in_num; i++) {
+        /* Base is overwritten by virtqueue_map.  */
+        elem->in_sg[i].iov_base = 0;
+        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
+    }
+
+    for (i = 0; i < elem->out_num; i++) {
+        /* Base is overwritten by virtqueue_map.  */
+        elem->out_sg[i].iov_base = 0;
+        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
+    }
+
+    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        qemu_get_be32s(f, &elem->ndescs);
+    }
+
+    virtqueue_map(vdev, elem);
+    return elem;
+}
+
+void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
+                                VirtQueueElement *elem)
+{
+    VirtQueueElementOld data;
+    int i;
+
+    memset(&data, 0, sizeof(data));
+    data.index = elem->index;
+    data.in_num = elem->in_num;
+    data.out_num = elem->out_num;
+
+    for (i = 0; i < elem->in_num; i++) {
+        data.in_addr[i] = elem->in_addr[i];
+    }
+
+    for (i = 0; i < elem->out_num; i++) {
+        data.out_addr[i] = elem->out_addr[i];
+    }
+
+    for (i = 0; i < elem->in_num; i++) {
+        /* Base is overwritten by virtqueue_map when loading.  Do not
+         * save it, as it would leak the QEMU address space layout.  */
+        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
+    }
+
+    for (i = 0; i < elem->out_num; i++) {
+        /* Do not save iov_base as above.  */
+        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
+    }
+
+    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        qemu_put_be32s(f, &elem->ndescs);
+    }
+
+    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
+}
+
+/* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (virtio_device_disabled(vdev)) {
+        return;
+    }
+
+    if (k->notify) {
+        k->notify(qbus->parent, vector);
+    }
+}
+
+void virtio_update_irq(VirtIODevice *vdev)
+{
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+}
+
+static int virtio_validate_features(VirtIODevice *vdev)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
+        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+        return -EFAULT;
+    }
+
+    if (k->validate_features) {
+        return k->validate_features(vdev);
+    } else {
+        return 0;
+    }
+}
+
+int virtio_set_status(VirtIODevice *vdev, uint8_t val)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    trace_virtio_set_status(vdev, val);
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
+            val & VIRTIO_CONFIG_S_FEATURES_OK) {
+            int ret = virtio_validate_features(vdev);
+
+            if (ret) {
+                return ret;
+            }
+        }
+    }
+
+    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
+        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
+    }
+
+    if (k->set_status) {
+        k->set_status(vdev, val);
+    }
+    vdev->status = val;
+
+    return 0;
+}
+
+static enum virtio_device_endian virtio_default_endian(void)
+{
+    if (target_words_bigendian()) {
+        return VIRTIO_DEVICE_ENDIAN_BIG;
+    } else {
+        return VIRTIO_DEVICE_ENDIAN_LITTLE;
+    }
+}
+
+static enum virtio_device_endian virtio_current_cpu_endian(void)
+{
+    if (cpu_virtio_is_big_endian(current_cpu)) {
+        return VIRTIO_DEVICE_ENDIAN_BIG;
+    } else {
+        return VIRTIO_DEVICE_ENDIAN_LITTLE;
+    }
+}
+
+static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
+{
+    vdev->vq[i].vring.desc = 0;
+    vdev->vq[i].vring.avail = 0;
+    vdev->vq[i].vring.used = 0;
+    vdev->vq[i].last_avail_idx = 0;
+    vdev->vq[i].shadow_avail_idx = 0;
+    vdev->vq[i].used_idx = 0;
+    vdev->vq[i].last_avail_wrap_counter = true;
+    vdev->vq[i].shadow_avail_wrap_counter = true;
+    vdev->vq[i].used_wrap_counter = true;
+    virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
+    vdev->vq[i].signalled_used = 0;
+    vdev->vq[i].signalled_used_valid = false;
+    vdev->vq[i].notification = true;
+    vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
+    vdev->vq[i].inuse = 0;
+    virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
+}
+
+void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    if (k->queue_reset) {
+        k->queue_reset(vdev, queue_index);
+    }
+
+    __virtio_queue_reset(vdev, queue_index);
+}
+
+void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    /*
+     * TODO: Seabios is currently out of spec and triggering this error.
+     * So this needs to be fixed in Seabios, then this can
+     * be re-enabled for new machine types only, and also after
+     * being converted to LOG_GUEST_ERROR.
+     *
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        error_report("queue_enable is only suppported in devices of virtio "
+                     "1.0 or later.");
+    }
+    */
+
+    if (k->queue_enable) {
+        k->queue_enable(vdev, queue_index);
+    }
+}
+
+void virtio_reset(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    int i;
+
+    virtio_set_status(vdev, 0);
+    if (current_cpu) {
+        /* Guest initiated reset */
+        vdev->device_endian = virtio_current_cpu_endian();
+    } else {
+        /* System reset */
+        vdev->device_endian = virtio_default_endian();
+    }
+
+    if (k->reset) {
+        k->reset(vdev);
+    }
+
+    vdev->start_on_kick = false;
+    vdev->started = false;
+    vdev->broken = false;
+    vdev->guest_features = 0;
+    vdev->queue_sel = 0;
+    vdev->status = 0;
+    vdev->disabled = false;
+    qatomic_set(&vdev->isr, 0);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
+
+    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        __virtio_queue_reset(vdev, i);
+    }
+}
+
+uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint8_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = ldub_p(vdev->config + addr);
+    return val;
+}
+
+uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint16_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = lduw_p(vdev->config + addr);
+    return val;
+}
+
+uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint32_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = ldl_p(vdev->config + addr);
+    return val;
+}
+
+void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint8_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stb_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint16_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stw_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint32_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stl_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint8_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = ldub_p(vdev->config + addr);
+    return val;
+}
+
+uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint16_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = lduw_le_p(vdev->config + addr);
+    return val;
+}
+
+uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint32_t val;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return (uint32_t)-1;
+    }
+
+    k->get_config(vdev, vdev->config);
+
+    val = ldl_le_p(vdev->config + addr);
+    return val;
+}
+
+void virtio_config_modern_writeb(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint8_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stb_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+void virtio_config_modern_writew(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint16_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stw_le_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+void virtio_config_modern_writel(VirtIODevice *vdev,
+                                 uint32_t addr, uint32_t data)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint32_t val = data;
+
+    if (addr + sizeof(val) > vdev->config_len) {
+        return;
+    }
+
+    stl_le_p(vdev->config + addr, val);
+
+    if (k->set_config) {
+        k->set_config(vdev, vdev->config);
+    }
+}
+
+void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
+{
+    if (!vdev->vq[n].vring.num) {
+        return;
+    }
+    vdev->vq[n].vring.desc = addr;
+    virtio_queue_update_rings(vdev, n);
+}
+
+hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.desc;
+}
+
+void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
+                            hwaddr avail, hwaddr used)
+{
+    if (!vdev->vq[n].vring.num) {
+        return;
+    }
+    vdev->vq[n].vring.desc = desc;
+    vdev->vq[n].vring.avail = avail;
+    vdev->vq[n].vring.used = used;
+    virtio_init_region_cache(vdev, n);
+}
+
+void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
+{
+    /* Don't allow guest to flip queue between existent and
+     * nonexistent states, or to set it to an invalid size.
+     */
+    if (!!num != !!vdev->vq[n].vring.num ||
+        num > VIRTQUEUE_MAX_SIZE ||
+        num < 0) {
+        return;
+    }
+    vdev->vq[n].vring.num = num;
+}
+
+VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
+{
+    return QLIST_FIRST(&vdev->vector_queues[vector]);
+}
+
+VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
+{
+    return QLIST_NEXT(vq, node);
+}
+
+int virtio_queue_get_num(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.num;
+}
+
+int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.num_default;
+}
+
+int virtio_get_num_queues(VirtIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (!virtio_queue_get_num(vdev, i)) {
+            break;
+        }
+    }
+
+    return i;
+}
+
+void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    /* virtio-1 compliant devices cannot change the alignment */
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        error_report("tried to modify queue alignment for virtio-1 device");
+        return;
+    }
+    /* Check that the transport told us it was going to do this
+     * (so a buggy transport will immediately assert rather than
+     * silently failing to migrate this state)
+     */
+    assert(k->has_variable_vring_alignment);
+
+    if (align) {
+        vdev->vq[n].vring.align = align;
+        virtio_queue_update_rings(vdev, n);
+    }
+}
+
+static void virtio_queue_notify_vq(VirtQueue *vq)
+{
+    if (vq->vring.desc && vq->handle_output) {
+        VirtIODevice *vdev = vq->vdev;
+
+        if (unlikely(vdev->broken)) {
+            return;
+        }
+
+        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+        vq->handle_output(vdev, vq);
+
+        if (unlikely(vdev->start_on_kick)) {
+            virtio_set_started(vdev, true);
+        }
+    }
+}
+
+void virtio_queue_notify(VirtIODevice *vdev, int n)
+{
+    VirtQueue *vq = &vdev->vq[n];
+
+    if (unlikely(!vq->vring.desc || vdev->broken)) {
+        return;
+    }
+
+    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+    if (vq->host_notifier_enabled) {
+        event_notifier_set(&vq->host_notifier);
+    } else if (vq->handle_output) {
+        vq->handle_output(vdev, vq);
+
+        if (unlikely(vdev->start_on_kick)) {
+            virtio_set_started(vdev, true);
+        }
+    }
+}
+
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    VirtQueue *vq = &vdev->vq[n];
+
+    if (n < VIRTIO_QUEUE_MAX) {
+        if (vdev->vector_queues &&
+            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
+            QLIST_REMOVE(vq, node);
+        }
+        vdev->vq[n].vector = vector;
+        if (vdev->vector_queues &&
+            vector != VIRTIO_NO_VECTOR) {
+            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
+        }
+    }
+}
+
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+                            VirtIOHandleOutput handle_output)
+{
+    int i;
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num == 0)
+            break;
+    }
+
+    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
+        abort();
+
+    vdev->vq[i].vring.num = queue_size;
+    vdev->vq[i].vring.num_default = queue_size;
+    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
+    vdev->vq[i].handle_output = handle_output;
+    vdev->vq[i].used_elems = g_new0(VirtQueueElement, queue_size);
+
+    return &vdev->vq[i];
+}
+
+void virtio_delete_queue(VirtQueue *vq)
+{
+    vq->vring.num = 0;
+    vq->vring.num_default = 0;
+    vq->handle_output = NULL;
+    g_free(vq->used_elems);
+    vq->used_elems = NULL;
+    virtio_virtqueue_reset_region_cache(vq);
+}
+
+void virtio_del_queue(VirtIODevice *vdev, int n)
+{
+    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
+        abort();
+    }
+
+    virtio_delete_queue(&vdev->vq[n]);
+}
+
+static void virtio_set_isr(VirtIODevice *vdev, int value)
+{
+    uint8_t old = qatomic_read(&vdev->isr);
+
+    /* Do not write ISR if it does not change, so that its cacheline remains
+     * shared in the common case where the guest does not read it.
+     */
+    if ((old & value) != value) {
+        qatomic_or(&vdev->isr, value);
+    }
+}
+
+/* Called within rcu_read_lock(). */
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    uint16_t old, new;
+    bool v;
+    /* We need to expose used array entries before checking used event. */
+    smp_mb();
+    /* Always notify when queue is empty (when feature acknowledge) */
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+        !vq->inuse && virtio_queue_empty(vq)) {
+        return true;
+    }
+
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+    }
+
+    v = vq->signalled_used_valid;
+    vq->signalled_used_valid = true;
+    old = vq->signalled_used;
+    new = vq->signalled_used = vq->used_idx;
+    return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
+                                    uint16_t off_wrap, uint16_t new,
+                                    uint16_t old)
+{
+    int off = off_wrap & ~(1 << 15);
+
+    if (wrap != off_wrap >> 15) {
+        off -= vq->vring.num;
+    }
+
+    return vring_need_event(off, new, old);
+}
+
+/* Called within rcu_read_lock(). */
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VRingPackedDescEvent e;
+    uint16_t old, new;
+    bool v;
+    VRingMemoryRegionCaches *caches;
+
+    caches = vring_get_region_caches(vq);
+    if (!caches) {
+        return false;
+    }
+
+    vring_packed_event_read(vdev, &caches->avail, &e);
+
+    old = vq->signalled_used;
+    new = vq->signalled_used = vq->used_idx;
+    v = vq->signalled_used_valid;
+    vq->signalled_used_valid = true;
+
+    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
+        return false;
+    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
+        return true;
+    }
+
+    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
+                                         e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_packed_should_notify(vdev, vq);
+    } else {
+        return virtio_split_should_notify(vdev, vq);
+    }
+}
+
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
+{
+    WITH_RCU_READ_LOCK_GUARD() {
+        if (!virtio_should_notify(vdev, vq)) {
+            return;
+        }
+    }
+
+    trace_virtio_notify_irqfd(vdev, vq);
+
+    /*
+     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
+     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
+     * incorrectly polling this bit during crashdump and hibernation
+     * in MSI mode, causing a hang if this bit is never updated.
+     * Recent releases of Windows do not really shut down, but rather
+     * log out and hibernate to make the next startup faster.  Hence,
+     * this manifested as a more serious hang during shutdown with
+     *
+     * Next driver release from 2016 fixed this problem, so working around it
+     * is not a must, but it's easy to do so let's do it here.
+     *
+     * Note: it's safe to update ISR from any thread as it was switched
+     * to an atomic operation.
+     */
+    virtio_set_isr(vq->vdev, 0x1);
+    event_notifier_set(&vq->guest_notifier);
+}
+
+static void virtio_irq(VirtQueue *vq)
+{
+    virtio_set_isr(vq->vdev, 0x1);
+    virtio_notify_vector(vq->vdev, vq->vector);
+}
+
+void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    WITH_RCU_READ_LOCK_GUARD() {
+        if (!virtio_should_notify(vdev, vq)) {
+            return;
+        }
+    }
+
+    trace_virtio_notify(vdev, vq);
+    virtio_irq(vq);
+}
+
+void virtio_notify_config(VirtIODevice *vdev)
+{
+    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+        return;
+
+    virtio_set_isr(vdev, 0x3);
+    vdev->generation++;
+    virtio_notify_vector(vdev, vdev->config_vector);
+}
+
+static bool virtio_device_endian_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        return vdev->device_endian != virtio_default_endian();
+    }
+    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
+    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
+}
+
+static bool virtio_64bit_features_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return (vdev->host_features >> 32) != 0;
+}
+
+static bool virtio_virtqueue_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
+}
+
+static bool virtio_packed_virtqueue_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
+}
+
+static bool virtio_ringsize_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+    int i;
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool virtio_extra_state_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    return k->has_extra_state &&
+        k->has_extra_state(qbus->parent);
+}
+
+static bool virtio_broken_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->broken;
+}
+
+static bool virtio_started_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->started;
+}
+
+static bool virtio_disabled_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->disabled;
+}
+
+static const VMStateDescription vmstate_virtqueue = {
+    .name = "virtqueue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(vring.avail, struct VirtQueue),
+        VMSTATE_UINT64(vring.used, struct VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_packed_virtqueue = {
+    .name = "packed_virtqueue_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
+        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
+        VMSTATE_UINT16(used_idx, struct VirtQueue),
+        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
+        VMSTATE_UINT32(inuse, struct VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_virtqueues = {
+    .name = "virtio/virtqueues",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_virtqueue_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_packed_virtqueues = {
+    .name = "virtio/packed_virtqueues",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_packed_virtqueue_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_ringsize = {
+    .name = "ringsize_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_ringsize = {
+    .name = "virtio/ringsize",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_ringsize_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int get_extra_state(QEMUFile *f, void *pv, size_t size,
+                           const VMStateField *field)
+{
+    VirtIODevice *vdev = pv;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (!k->load_extra_state) {
+        return -1;
+    } else {
+        return k->load_extra_state(qbus->parent, f);
+    }
+}
+
+static int put_extra_state(QEMUFile *f, void *pv, size_t size,
+                           const VMStateField *field, JSONWriter *vmdesc)
+{
+    VirtIODevice *vdev = pv;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    k->save_extra_state(qbus->parent, f);
+    return 0;
+}
+
+static const VMStateInfo vmstate_info_extra_state = {
+    .name = "virtqueue_extra_state",
+    .get = get_extra_state,
+    .put = put_extra_state,
+};
+
+static const VMStateDescription vmstate_virtio_extra_state = {
+    .name = "virtio/extra_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_extra_state_needed,
+    .fields = (VMStateField[]) {
+        {
+            .name         = "extra_state",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &vmstate_info_extra_state,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_device_endian = {
+    .name = "virtio/device_endian",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_device_endian_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(device_endian, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_64bit_features = {
+    .name = "virtio/64bit_features",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_64bit_features_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(guest_features, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_broken = {
+    .name = "virtio/broken",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_broken_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(broken, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_started = {
+    .name = "virtio/started",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_started_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(started, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio_disabled = {
+    .name = "virtio/disabled",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_disabled_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(disabled, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_virtio = {
+    .name = "virtio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_virtio_device_endian,
+        &vmstate_virtio_64bit_features,
+        &vmstate_virtio_virtqueues,
+        &vmstate_virtio_ringsize,
+        &vmstate_virtio_broken,
+        &vmstate_virtio_extra_state,
+        &vmstate_virtio_started,
+        &vmstate_virtio_packed_virtqueues,
+        &vmstate_virtio_disabled,
+        NULL
+    }
+};
+
+int virtio_save(VirtIODevice *vdev, QEMUFile *f)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
+    int i;
+
+    if (k->save_config) {
+        k->save_config(qbus->parent, f);
+    }
+
+    qemu_put_8s(f, &vdev->status);
+    qemu_put_8s(f, &vdev->isr);
+    qemu_put_be16s(f, &vdev->queue_sel);
+    qemu_put_be32s(f, &guest_features_lo);
+    qemu_put_be32(f, vdev->config_len);
+    qemu_put_buffer(f, vdev->config, vdev->config_len);
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num == 0)
+            break;
+    }
+
+    qemu_put_be32(f, i);
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num == 0)
+            break;
+
+        qemu_put_be32(f, vdev->vq[i].vring.num);
+        if (k->has_variable_vring_alignment) {
+            qemu_put_be32(f, vdev->vq[i].vring.align);
+        }
+        /*
+         * Save desc now, the rest of the ring addresses are saved in
+         * subsections for VIRTIO-1 devices.
+         */
+        qemu_put_be64(f, vdev->vq[i].vring.desc);
+        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (k->save_queue) {
+            k->save_queue(qbus->parent, i, f);
+        }
+    }
+
+    if (vdc->save != NULL) {
+        vdc->save(vdev, f);
+    }
+
+    if (vdc->vmsd) {
+        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    /* Subsections */
+    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
+}
+
+/* A wrapper for use as a VMState .put function */
+static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
+                              const VMStateField *field, JSONWriter *vmdesc)
+{
+    return virtio_save(VIRTIO_DEVICE(opaque), f);
+}
+
+/* A wrapper for use as a VMState .get function */
+static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
+                             const VMStateField *field)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
+
+    return virtio_load(vdev, f, dc->vmsd->version_id);
+}
+
+const VMStateInfo  virtio_vmstate_info = {
+    .name = "virtio",
+    .get = virtio_device_get,
+    .put = virtio_device_put,
+};
+
+static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
+{
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    bool bad = (val & ~(vdev->host_features)) != 0;
+
+    val &= vdev->host_features;
+    if (k->set_features) {
+        k->set_features(vdev, val);
+    }
+    vdev->guest_features = val;
+    return bad ? -1 : 0;
+}
+
+int virtio_set_features(VirtIODevice *vdev, uint64_t val)
+{
+    int ret;
+    /*
+     * The driver must not attempt to set features after feature negotiation
+     * has finished.
+     */
+    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
+        return -EINVAL;
+    }
+
+    if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
+                      __func__, vdev->name);
+    }
+
+    ret = virtio_set_features_nocheck(vdev, val);
+    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
+        int i;
+        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+            if (vdev->vq[i].vring.num != 0) {
+                virtio_init_region_cache(vdev, i);
+            }
+        }
+    }
+    if (!ret) {
+        if (!virtio_device_started(vdev, vdev->status) &&
+            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+            vdev->start_on_kick = true;
+        }
+    }
+    return ret;
+}
+
+size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
+                              uint64_t host_features)
+{
+    size_t config_size = params->min_size;
+    const VirtIOFeature *feature_sizes = params->feature_sizes;
+    size_t i;
+
+    for (i = 0; feature_sizes[i].flags != 0; i++) {
+        if (host_features & feature_sizes[i].flags) {
+            config_size = MAX(feature_sizes[i].end, config_size);
+        }
+    }
+
+    assert(config_size <= params->max_size);
+    return config_size;
+}
+
+int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
+{
+    int i, ret;
+    int32_t config_len;
+    uint32_t num;
+    uint32_t features;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+    /*
+     * We poison the endianness to ensure it does not get used before
+     * subsections have been loaded.
+     */
+    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
+
+    if (k->load_config) {
+        ret = k->load_config(qbus->parent, f);
+        if (ret)
+            return ret;
+    }
+
+    qemu_get_8s(f, &vdev->status);
+    qemu_get_8s(f, &vdev->isr);
+    qemu_get_be16s(f, &vdev->queue_sel);
+    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
+        return -1;
+    }
+    qemu_get_be32s(f, &features);
+
+    /*
+     * Temporarily set guest_features low bits - needed by
+     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
+     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
+     *
+     * Note: devices should always test host features in future - don't create
+     * new dependencies like this.
+     */
+    vdev->guest_features = features;
+
+    config_len = qemu_get_be32(f);
+
+    /*
+     * There are cases where the incoming config can be bigger or smaller
+     * than what we have; so load what we have space for, and skip
+     * any excess that's in the stream.
+     */
+    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
+
+    while (config_len > vdev->config_len) {
+        qemu_get_byte(f);
+        config_len--;
+    }
+
+    num = qemu_get_be32(f);
+
+    if (num > VIRTIO_QUEUE_MAX) {
+        error_report("Invalid number of virtqueues: 0x%x", num);
+        return -1;
+    }
+
+    for (i = 0; i < num; i++) {
+        vdev->vq[i].vring.num = qemu_get_be32(f);
+        if (k->has_variable_vring_alignment) {
+            vdev->vq[i].vring.align = qemu_get_be32(f);
+        }
+        vdev->vq[i].vring.desc = qemu_get_be64(f);
+        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
+        vdev->vq[i].signalled_used_valid = false;
+        vdev->vq[i].notification = true;
+
+        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
+            error_report("VQ %d address 0x0 "
+                         "inconsistent with Host index 0x%x",
+                         i, vdev->vq[i].last_avail_idx);
+            return -1;
+        }
+        if (k->load_queue) {
+            ret = k->load_queue(qbus->parent, i, f);
+            if (ret)
+                return ret;
+        }
+    }
+
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+
+    if (vdc->load != NULL) {
+        ret = vdc->load(vdev, f, version_id);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    if (vdc->vmsd) {
+        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    /* Subsections */
+    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
+    if (ret) {
+        return ret;
+    }
+
+    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
+        vdev->device_endian = virtio_default_endian();
+    }
+
+    if (virtio_64bit_features_needed(vdev)) {
+        /*
+         * Subsection load filled vdev->guest_features.  Run them
+         * through virtio_set_features to sanity-check them against
+         * host_features.
+         */
+        uint64_t features64 = vdev->guest_features;
+        if (virtio_set_features_nocheck(vdev, features64) < 0) {
+            error_report("Features 0x%" PRIx64 " unsupported. "
+                         "Allowed features: 0x%" PRIx64,
+                         features64, vdev->host_features);
+            return -1;
+        }
+    } else {
+        if (virtio_set_features_nocheck(vdev, features) < 0) {
+            error_report("Features 0x%x unsupported. "
+                         "Allowed features: 0x%" PRIx64,
+                         features, vdev->host_features);
+            return -1;
+        }
+    }
+
+    if (!virtio_device_started(vdev, vdev->status) &&
+        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        vdev->start_on_kick = true;
+    }
+
+    RCU_READ_LOCK_GUARD();
+    for (i = 0; i < num; i++) {
+        if (vdev->vq[i].vring.desc) {
+            uint16_t nheads;
+
+            /*
+             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
+             * only the region cache needs to be set up.  Legacy devices need
+             * to calculate used and avail ring addresses based on the desc
+             * address.
+             */
+            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+                virtio_init_region_cache(vdev, i);
+            } else {
+                virtio_queue_update_rings(vdev, i);
+            }
+
+            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
+                vdev->vq[i].shadow_avail_wrap_counter =
+                                        vdev->vq[i].last_avail_wrap_counter;
+                continue;
+            }
+
+            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
+            /* Check it isn't doing strange things with descriptor numbers. */
+            if (nheads > vdev->vq[i].vring.num) {
+                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
+                             "inconsistent with Host index 0x%x: delta 0x%x",
+                             i, vdev->vq[i].vring.num,
+                             vring_avail_idx(&vdev->vq[i]),
+                             vdev->vq[i].last_avail_idx, nheads);
+                vdev->vq[i].used_idx = 0;
+                vdev->vq[i].shadow_avail_idx = 0;
+                vdev->vq[i].inuse = 0;
+                continue;
+            }
+            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
+            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
+
+            /*
+             * Some devices migrate VirtQueueElements that have been popped
+             * from the avail ring but not yet returned to the used ring.
+             * Since max ring size < UINT16_MAX it's safe to use modulo
+             * UINT16_MAX + 1 subtraction.
+             */
+            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
+                                vdev->vq[i].used_idx);
+            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
+                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
+                             "used_idx 0x%x",
+                             i, vdev->vq[i].vring.num,
+                             vdev->vq[i].last_avail_idx,
+                             vdev->vq[i].used_idx);
+                return -1;
+            }
+        }
+    }
+
+    if (vdc->post_load) {
+        ret = vdc->post_load(vdev);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+void virtio_cleanup(VirtIODevice *vdev)
+{
+    qemu_del_vm_change_state_handler(vdev->vmstate);
+}
+
+static void virtio_vmstate_change(void *opaque, bool running, RunState state)
+{
+    VirtIODevice *vdev = opaque;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    bool backend_run = running && virtio_device_started(vdev, vdev->status);
+    vdev->vm_running = running;
+
+    if (backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+
+    if (k->vmstate_change) {
+        k->vmstate_change(qbus->parent, backend_run);
+    }
+
+    if (!backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+}
+
+void virtio_instance_init_common(Object *proxy_obj, void *data,
+                                 size_t vdev_size, const char *vdev_name)
+{
+    DeviceState *vdev = data;
+
+    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
+                                       vdev_size, vdev_name, &error_abort,
+                                       NULL);
+    qdev_alias_all_properties(vdev, proxy_obj);
+}
+
+void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int i;
+    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
+
+    if (nvectors) {
+        vdev->vector_queues =
+            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
+    }
+
+    vdev->start_on_kick = false;
+    vdev->started = false;
+    vdev->vhost_started = false;
+    vdev->device_id = device_id;
+    vdev->status = 0;
+    qatomic_set(&vdev->isr, 0);
+    vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    vdev->vq = g_new0(VirtQueue, VIRTIO_QUEUE_MAX);
+    vdev->vm_running = runstate_is_running();
+    vdev->broken = false;
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+        vdev->vq[i].vdev = vdev;
+        vdev->vq[i].queue_index = i;
+        vdev->vq[i].host_notifier_enabled = false;
+    }
+
+    vdev->name = virtio_id_to_name(device_id);
+    vdev->config_len = config_size;
+    if (vdev->config_len) {
+        vdev->config = g_malloc0(config_size);
+    } else {
+        vdev->config = NULL;
+    }
+    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
+            virtio_vmstate_change, vdev);
+    vdev->device_endian = virtio_default_endian();
+    vdev->use_guest_notifier_mask = true;
+}
+
+/*
+ * Only devices that have already been around prior to defining the virtio
+ * standard support legacy mode; this includes devices not specified in the
+ * standard. All newer devices conform to the virtio standard only.
+ */
+bool virtio_legacy_allowed(VirtIODevice *vdev)
+{
+    switch (vdev->device_id) {
+    case VIRTIO_ID_NET:
+    case VIRTIO_ID_BLOCK:
+    case VIRTIO_ID_CONSOLE:
+    case VIRTIO_ID_RNG:
+    case VIRTIO_ID_BALLOON:
+    case VIRTIO_ID_RPMSG:
+    case VIRTIO_ID_SCSI:
+    case VIRTIO_ID_9P:
+    case VIRTIO_ID_RPROC_SERIAL:
+    case VIRTIO_ID_CAIF:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool virtio_legacy_check_disabled(VirtIODevice *vdev)
+{
+    return vdev->disable_legacy_check;
+}
+
+hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.desc;
+}
+
+bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
+{
+    return virtio_queue_get_desc_addr(vdev, n) != 0;
+}
+
+bool virtio_queue_enabled(VirtIODevice *vdev, int n)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (k->queue_enabled) {
+        return k->queue_enabled(qbus->parent, n);
+    }
+    return virtio_queue_enabled_legacy(vdev, n);
+}
+
+hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.avail;
+}
+
+hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
+{
+    return vdev->vq[n].vring.used;
+}
+
+hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
+{
+    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+}
+
+hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+{
+    int s;
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return sizeof(struct VRingPackedDescEvent);
+    }
+
+    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+    return offsetof(VRingAvail, ring) +
+        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
+}
+
+hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
+{
+    int s;
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return sizeof(struct VRingPackedDescEvent);
+    }
+
+    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+    return offsetof(VRingUsed, ring) +
+        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
+}
+
+static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
+                                                           int n)
+{
+    unsigned int avail, used;
+
+    avail = vdev->vq[n].last_avail_idx;
+    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
+
+    used = vdev->vq[n].used_idx;
+    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
+
+    return avail | used << 16;
+}
+
+static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
+                                                      int n)
+{
+    return vdev->vq[n].last_avail_idx;
+}
+
+unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_queue_packed_get_last_avail_idx(vdev, n);
+    } else {
+        return virtio_queue_split_get_last_avail_idx(vdev, n);
+    }
+}
+
+static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
+                                                   int n, unsigned int idx)
+{
+    struct VirtQueue *vq = &vdev->vq[n];
+
+    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
+    vq->last_avail_wrap_counter =
+        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
+    idx >>= 16;
+    vq->used_idx = idx & 0x7ffff;
+    vq->used_wrap_counter = !!(idx & 0x8000);
+}
+
+static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
+                                                  int n, unsigned int idx)
+{
+        vdev->vq[n].last_avail_idx = idx;
+        vdev->vq[n].shadow_avail_idx = idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
+                                     unsigned int idx)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
+    } else {
+        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
+    }
+}
+
+static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
+                                                       int n)
+{
+    /* We don't have a reference like avail idx in shared memory */
+    return;
+}
+
+static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
+                                                      int n)
+{
+    RCU_READ_LOCK_GUARD();
+    if (vdev->vq[n].vring.desc) {
+        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
+        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
+    }
+}
+
+void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        virtio_queue_packed_restore_last_avail_idx(vdev, n);
+    } else {
+        virtio_queue_split_restore_last_avail_idx(vdev, n);
+    }
+}
+
+static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
+{
+    /* used idx was updated through set_last_avail_idx() */
+    return;
+}
+
+static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
+{
+    RCU_READ_LOCK_GUARD();
+    if (vdev->vq[n].vring.desc) {
+        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+    }
+}
+
+void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_queue_packed_update_used_idx(vdev, n);
+    } else {
+        return virtio_split_packed_update_used_idx(vdev, n);
+    }
+}
+
+void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
+{
+    vdev->vq[n].signalled_used_valid = false;
+}
+
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
+{
+    return vdev->vq + n;
+}
+
+uint16_t virtio_get_queue_index(VirtQueue *vq)
+{
+    return vq->queue_index;
+}
+
+static void virtio_queue_guest_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_irq(vq);
+    }
+}
+
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                bool with_irqfd)
+{
+    if (assign && !with_irqfd) {
+        event_notifier_set_handler(&vq->guest_notifier,
+                                   virtio_queue_guest_notifier_read);
+    } else {
+        event_notifier_set_handler(&vq->guest_notifier, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before closing it,
+         * in case poll callback didn't have time to run. */
+        virtio_queue_guest_notifier_read(&vq->guest_notifier);
+    }
+}
+
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+{
+    return &vq->guest_notifier;
+}
+
+static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+
+    virtio_queue_set_notification(vq, 0);
+}
+
+static bool virtio_queue_host_notifier_aio_poll(void *opaque)
+{
+    EventNotifier *n = opaque;
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+
+    return vq->vring.desc && !virtio_queue_empty(vq);
+}
+
+static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+
+    virtio_queue_notify_vq(vq);
+}
+
+static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+
+    /* Caller polls once more after this to catch requests that race with us */
+    virtio_queue_set_notification(vq, 1);
+}
+
+void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
+{
+    aio_set_event_notifier(ctx, &vq->host_notifier, true,
+                           virtio_queue_host_notifier_read,
+                           virtio_queue_host_notifier_aio_poll,
+                           virtio_queue_host_notifier_aio_poll_ready);
+    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
+                                virtio_queue_host_notifier_aio_poll_begin,
+                                virtio_queue_host_notifier_aio_poll_end);
+}
+
+/*
+ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
+ * this for rx virtqueues and similar cases where the virtqueue handler
+ * function does not pop all elements. When the virtqueue is left non-empty
+ * polling consumes CPU cycles and should not be used.
+ */
+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
+{
+    aio_set_event_notifier(ctx, &vq->host_notifier, true,
+                           virtio_queue_host_notifier_read,
+                           NULL, NULL);
+}
+
+void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
+{
+    aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
+    /* Test and clear notifier before after disabling event,
+     * in case poll callback didn't have time to run. */
+    virtio_queue_host_notifier_read(&vq->host_notifier);
+}
+
+void virtio_queue_host_notifier_read(EventNotifier *n)
+{
+    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_queue_notify_vq(vq);
+    }
+}
+
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+{
+    return &vq->host_notifier;
+}
+
+void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
+{
+    vq->host_notifier_enabled = enabled;
+}
+
+int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
+                                      MemoryRegion *mr, bool assign)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (k->set_host_notifier_mr) {
+        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
+    }
+
+    return -1;
+}
+
+void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
+{
+    g_free(vdev->bus_name);
+    vdev->bus_name = g_strdup(bus_name);
+}
+
+void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
+        virtio_notify_config(vdev);
+    }
+
+    vdev->broken = true;
+}
+
+static void virtio_memory_listener_commit(MemoryListener *listener)
+{
+    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
+    int i;
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num == 0) {
+            break;
+        }
+        virtio_init_region_cache(vdev, i);
+    }
+}
+
+static void virtio_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
+    Error *err = NULL;
+
+    /* Devices should either use vmsd or the load/save methods */
+    assert(!vdc->vmsd || !vdc->load);
+
+    if (vdc->realize != NULL) {
+        vdc->realize(dev, &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+
+    virtio_bus_device_plugged(vdev, &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        vdc->unrealize(dev);
+        return;
+    }
+
+    vdev->listener.commit = virtio_memory_listener_commit;
+    vdev->listener.name = "virtio";
+    memory_listener_register(&vdev->listener, vdev->dma_as);
+    QTAILQ_INSERT_TAIL(&virtio_list, vdev, next);
+}
+
+static void virtio_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
+
+    memory_listener_unregister(&vdev->listener);
+    virtio_bus_device_unplugged(vdev);
+
+    if (vdc->unrealize != NULL) {
+        vdc->unrealize(dev);
+    }
+
+    QTAILQ_REMOVE(&virtio_list, vdev, next);
+    g_free(vdev->bus_name);
+    vdev->bus_name = NULL;
+}
+
+static void virtio_device_free_virtqueues(VirtIODevice *vdev)
+{
+    int i;
+    if (!vdev->vq) {
+        return;
+    }
+
+    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+        if (vdev->vq[i].vring.num == 0) {
+            break;
+        }
+        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
+    }
+    g_free(vdev->vq);
+}
+
+static void virtio_device_instance_finalize(Object *obj)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
+
+    virtio_device_free_virtqueues(vdev);
+
+    g_free(vdev->config);
+    g_free(vdev->vector_queues);
+}
+
+static Property virtio_properties[] = {
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
+    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
+    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
+    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
+                     disable_legacy_check, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
+{
+    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int i, n, r, err;
+
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        VirtQueue *vq = &vdev->vq[n];
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+        r = virtio_bus_set_host_notifier(qbus, n, true);
+        if (r < 0) {
+            err = r;
+            goto assign_error;
+        }
+        event_notifier_set_handler(&vq->host_notifier,
+                                   virtio_queue_host_notifier_read);
+    }
+
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        /* Kick right away to begin processing requests already in vring */
+        VirtQueue *vq = &vdev->vq[n];
+        if (!vq->vring.num) {
+            continue;
+        }
+        event_notifier_set(&vq->host_notifier);
+    }
+    memory_region_transaction_commit();
+    return 0;
+
+assign_error:
+    i = n; /* save n for a second iteration after transaction is committed. */
+    while (--n >= 0) {
+        VirtQueue *vq = &vdev->vq[n];
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+
+        event_notifier_set_handler(&vq->host_notifier, NULL);
+        r = virtio_bus_set_host_notifier(qbus, n, false);
+        assert(r >= 0);
+    }
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    while (--i >= 0) {
+        if (!virtio_queue_get_num(vdev, i)) {
+            continue;
+        }
+        virtio_bus_cleanup_host_notifier(qbus, i);
+    }
+    return err;
+}
+
+int virtio_device_start_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_start_ioeventfd(vbus);
+}
+
+static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
+{
+    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int n, r;
+
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        VirtQueue *vq = &vdev->vq[n];
+
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+        event_notifier_set_handler(&vq->host_notifier, NULL);
+        r = virtio_bus_set_host_notifier(qbus, n, false);
+        assert(r >= 0);
+    }
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+        virtio_bus_cleanup_host_notifier(qbus, n);
+    }
+}
+
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_grab_ioeventfd(vbus);
+}
+
+void virtio_device_release_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    virtio_bus_release_ioeventfd(vbus);
+}
+
+static void virtio_device_class_init(ObjectClass *klass, void *data)
+{
+    /* Set the default value here. */
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = virtio_device_realize;
+    dc->unrealize = virtio_device_unrealize;
+    dc->bus_type = TYPE_VIRTIO_BUS;
+    device_class_set_props(dc, virtio_properties);
+    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
+    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
+
+    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
+
+    QTAILQ_INIT(&virtio_list);
+}
+
+bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_ioeventfd_enabled(vbus);
+}
+
+VirtioInfoList *qmp_x_query_virtio(Error **errp)
+{
+    VirtioInfoList *list = NULL;
+    VirtioInfoList *node;
+    VirtIODevice *vdev;
+
+    QTAILQ_FOREACH(vdev, &virtio_list, next) {
+        DeviceState *dev = DEVICE(vdev);
+        Error *err = NULL;
+        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
+
+        if (err == NULL) {
+            GString *is_realized = qobject_to_json_pretty(obj, true);
+            /* virtio device is NOT realized, remove it from list */
+            if (!strncmp(is_realized->str, "false", 4)) {
+                QTAILQ_REMOVE(&virtio_list, vdev, next);
+            } else {
+                node = g_new0(VirtioInfoList, 1);
+                node->value = g_new(VirtioInfo, 1);
+                node->value->path = g_strdup(dev->canonical_path);
+                node->value->name = g_strdup(vdev->name);
+                QAPI_LIST_PREPEND(list, node->value);
+            }
+           g_string_free(is_realized, true);
+        }
+        qobject_unref(obj);
+    }
+
+    return list;
+}
+
+static VirtIODevice *virtio_device_find(const char *path)
+{
+    VirtIODevice *vdev;
+
+    QTAILQ_FOREACH(vdev, &virtio_list, next) {
+        DeviceState *dev = DEVICE(vdev);
+
+        if (strcmp(dev->canonical_path, path) != 0) {
+            continue;
+        }
+
+        Error *err = NULL;
+        QObject *obj = qmp_qom_get(dev->canonical_path, "realized", &err);
+        if (err == NULL) {
+            GString *is_realized = qobject_to_json_pretty(obj, true);
+            /* virtio device is NOT realized, remove it from list */
+            if (!strncmp(is_realized->str, "false", 4)) {
+                g_string_free(is_realized, true);
+                qobject_unref(obj);
+                QTAILQ_REMOVE(&virtio_list, vdev, next);
+                return NULL;
+            }
+            g_string_free(is_realized, true);
+        } else {
+            /* virtio device doesn't exist in QOM tree */
+            QTAILQ_REMOVE(&virtio_list, vdev, next);
+            qobject_unref(obj);
+            return NULL;
+        }
+        /* device exists in QOM tree & is realized */
+        qobject_unref(obj);
+        return vdev;
+    }
+    return NULL;
+}
+
+#define CONVERT_FEATURES(type, map, is_status, bitmap)   \
+    ({                                                   \
+        type *list = NULL;                               \
+        type *node;                                      \
+        for (i = 0; map[i].virtio_bit != -1; i++) {      \
+            if (is_status) {                             \
+                bit = map[i].virtio_bit;                 \
+            }                                            \
+            else {                                       \
+                bit = 1ULL << map[i].virtio_bit;         \
+            }                                            \
+            if ((bitmap & bit) == 0) {                   \
+                continue;                                \
+            }                                            \
+            node = g_new0(type, 1);                      \
+            node->value = g_strdup(map[i].feature_desc); \
+            node->next = list;                           \
+            list = node;                                 \
+            bitmap ^= bit;                               \
+        }                                                \
+        list;                                            \
+    })
+
+static VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap)
+{
+    VirtioDeviceStatus *status;
+    uint8_t bit;
+    int i;
+
+    status = g_new0(VirtioDeviceStatus, 1);
+    status->statuses = CONVERT_FEATURES(strList, virtio_config_status_map,
+                                        1, bitmap);
+    status->has_unknown_statuses = bitmap != 0;
+    if (status->has_unknown_statuses) {
+        status->unknown_statuses = bitmap;
+    }
+
+    return status;
+}
+
+static VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap)
+{
+    VhostDeviceProtocols *vhu_protocols;
+    uint64_t bit;
+    int i;
+
+    vhu_protocols = g_new0(VhostDeviceProtocols, 1);
+    vhu_protocols->protocols =
+                    CONVERT_FEATURES(strList,
+                                     vhost_user_protocol_map, 0, bitmap);
+    vhu_protocols->has_unknown_protocols = bitmap != 0;
+    if (vhu_protocols->has_unknown_protocols) {
+        vhu_protocols->unknown_protocols = bitmap;
+    }
+
+    return vhu_protocols;
+}
+
+static VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
+                                                 uint64_t bitmap)
+{
+    VirtioDeviceFeatures *features;
+    uint64_t bit;
+    int i;
+
+    features = g_new0(VirtioDeviceFeatures, 1);
+    features->has_dev_features = true;
+
+    /* transport features */
+    features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0,
+                                            bitmap);
+
+    /* device features */
+    switch (device_id) {
+#ifdef CONFIG_VIRTIO_SERIAL
+    case VIRTIO_ID_CONSOLE:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_BLK
+    case VIRTIO_ID_BLOCK:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_GPU
+    case VIRTIO_ID_GPU:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_NET
+    case VIRTIO_ID_NET:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_SCSI
+    case VIRTIO_ID_SCSI:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_BALLOON
+    case VIRTIO_ID_BALLOON:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_IOMMU
+    case VIRTIO_ID_IOMMU:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_INPUT
+    case VIRTIO_ID_INPUT:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VHOST_USER_FS
+    case VIRTIO_ID_FS:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VHOST_VSOCK
+    case VIRTIO_ID_VSOCK:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_CRYPTO
+    case VIRTIO_ID_CRYPTO:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_MEM
+    case VIRTIO_ID_MEM:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_I2C_ADAPTER
+    case VIRTIO_ID_I2C_ADAPTER:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap);
+        break;
+#endif
+#ifdef CONFIG_VIRTIO_RNG
+    case VIRTIO_ID_RNG:
+        features->dev_features =
+            CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap);
+        break;
+#endif
+    /* No features */
+    case VIRTIO_ID_9P:
+    case VIRTIO_ID_PMEM:
+    case VIRTIO_ID_IOMEM:
+    case VIRTIO_ID_RPMSG:
+    case VIRTIO_ID_CLOCK:
+    case VIRTIO_ID_MAC80211_WLAN:
+    case VIRTIO_ID_MAC80211_HWSIM:
+    case VIRTIO_ID_RPROC_SERIAL:
+    case VIRTIO_ID_MEMORY_BALLOON:
+    case VIRTIO_ID_CAIF:
+    case VIRTIO_ID_SIGNAL_DIST:
+    case VIRTIO_ID_PSTORE:
+    case VIRTIO_ID_SOUND:
+    case VIRTIO_ID_BT:
+    case VIRTIO_ID_RPMB:
+    case VIRTIO_ID_VIDEO_ENCODER:
+    case VIRTIO_ID_VIDEO_DECODER:
+    case VIRTIO_ID_SCMI:
+    case VIRTIO_ID_NITRO_SEC_MOD:
+    case VIRTIO_ID_WATCHDOG:
+    case VIRTIO_ID_CAN:
+    case VIRTIO_ID_DMABUF:
+    case VIRTIO_ID_PARAM_SERV:
+    case VIRTIO_ID_AUDIO_POLICY:
+    case VIRTIO_ID_GPIO:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    features->has_unknown_dev_features = bitmap != 0;
+    if (features->has_unknown_dev_features) {
+        features->unknown_dev_features = bitmap;
+    }
+
+    return features;
+}
+
+VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
+{
+    VirtIODevice *vdev;
+    VirtioStatus *status;
+
+    vdev = virtio_device_find(path);
+    if (vdev == NULL) {
+        error_setg(errp, "Path %s is not a VirtIODevice", path);
+        return NULL;
+    }
+
+    status = g_new0(VirtioStatus, 1);
+    status->name = g_strdup(vdev->name);
+    status->device_id = vdev->device_id;
+    status->vhost_started = vdev->vhost_started;
+    status->guest_features = qmp_decode_features(vdev->device_id,
+                                                 vdev->guest_features);
+    status->host_features = qmp_decode_features(vdev->device_id,
+                                                vdev->host_features);
+    status->backend_features = qmp_decode_features(vdev->device_id,
+                                                   vdev->backend_features);
+
+    switch (vdev->device_endian) {
+    case VIRTIO_DEVICE_ENDIAN_LITTLE:
+        status->device_endian = g_strdup("little");
+        break;
+    case VIRTIO_DEVICE_ENDIAN_BIG:
+        status->device_endian = g_strdup("big");
+        break;
+    default:
+        status->device_endian = g_strdup("unknown");
+        break;
+    }
+
+    status->num_vqs = virtio_get_num_queues(vdev);
+    status->status = qmp_decode_status(vdev->status);
+    status->isr = vdev->isr;
+    status->queue_sel = vdev->queue_sel;
+    status->vm_running = vdev->vm_running;
+    status->broken = vdev->broken;
+    status->disabled = vdev->disabled;
+    status->use_started = vdev->use_started;
+    status->started = vdev->started;
+    status->start_on_kick = vdev->start_on_kick;
+    status->disable_legacy_check = vdev->disable_legacy_check;
+    status->bus_name = g_strdup(vdev->bus_name);
+    status->use_guest_notifier_mask = vdev->use_guest_notifier_mask;
+    status->has_vhost_dev = vdev->vhost_started;
+
+    if (vdev->vhost_started) {
+        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+        struct vhost_dev *hdev = vdc->get_vhost(vdev);
+
+        status->vhost_dev = g_new0(VhostStatus, 1);
+        status->vhost_dev->n_mem_sections = hdev->n_mem_sections;
+        status->vhost_dev->n_tmp_sections = hdev->n_tmp_sections;
+        status->vhost_dev->nvqs = hdev->nvqs;
+        status->vhost_dev->vq_index = hdev->vq_index;
+        status->vhost_dev->features =
+            qmp_decode_features(vdev->device_id, hdev->features);
+        status->vhost_dev->acked_features =
+            qmp_decode_features(vdev->device_id, hdev->acked_features);
+        status->vhost_dev->backend_features =
+            qmp_decode_features(vdev->device_id, hdev->backend_features);
+        status->vhost_dev->protocol_features =
+            qmp_decode_protocols(hdev->protocol_features);
+        status->vhost_dev->max_queues = hdev->max_queues;
+        status->vhost_dev->backend_cap = hdev->backend_cap;
+        status->vhost_dev->log_enabled = hdev->log_enabled;
+        status->vhost_dev->log_size = hdev->log_size;
+    }
+
+    return status;
+}
+
+VirtVhostQueueStatus *qmp_x_query_virtio_vhost_queue_status(const char *path,
+                                                            uint16_t queue,
+                                                            Error **errp)
+{
+    VirtIODevice *vdev;
+    VirtVhostQueueStatus *status;
+
+    vdev = virtio_device_find(path);
+    if (vdev == NULL) {
+        error_setg(errp, "Path %s is not a VirtIODevice", path);
+        return NULL;
+    }
+
+    if (!vdev->vhost_started) {
+        error_setg(errp, "Error: vhost device has not started yet");
+        return NULL;
+    }
+
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    struct vhost_dev *hdev = vdc->get_vhost(vdev);
+
+    if (queue < hdev->vq_index || queue >= hdev->vq_index + hdev->nvqs) {
+        error_setg(errp, "Invalid vhost virtqueue number %d", queue);
+        return NULL;
+    }
+
+    status = g_new0(VirtVhostQueueStatus, 1);
+    status->name = g_strdup(vdev->name);
+    status->kick = hdev->vqs[queue].kick;
+    status->call = hdev->vqs[queue].call;
+    status->desc = (uintptr_t)hdev->vqs[queue].desc;
+    status->avail = (uintptr_t)hdev->vqs[queue].avail;
+    status->used = (uintptr_t)hdev->vqs[queue].used;
+    status->num = hdev->vqs[queue].num;
+    status->desc_phys = hdev->vqs[queue].desc_phys;
+    status->desc_size = hdev->vqs[queue].desc_size;
+    status->avail_phys = hdev->vqs[queue].avail_phys;
+    status->avail_size = hdev->vqs[queue].avail_size;
+    status->used_phys = hdev->vqs[queue].used_phys;
+    status->used_size = hdev->vqs[queue].used_size;
+
+    return status;
+}
+
+VirtQueueStatus *qmp_x_query_virtio_queue_status(const char *path,
+                                                 uint16_t queue,
+                                                 Error **errp)
+{
+    VirtIODevice *vdev;
+    VirtQueueStatus *status;
+
+    vdev = virtio_device_find(path);
+    if (vdev == NULL) {
+        error_setg(errp, "Path %s is not a VirtIODevice", path);
+        return NULL;
+    }
+
+    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
+        error_setg(errp, "Invalid virtqueue number %d", queue);
+        return NULL;
+    }
+
+    status = g_new0(VirtQueueStatus, 1);
+    status->name = g_strdup(vdev->name);
+    status->queue_index = vdev->vq[queue].queue_index;
+    status->inuse = vdev->vq[queue].inuse;
+    status->vring_num = vdev->vq[queue].vring.num;
+    status->vring_num_default = vdev->vq[queue].vring.num_default;
+    status->vring_align = vdev->vq[queue].vring.align;
+    status->vring_desc = vdev->vq[queue].vring.desc;
+    status->vring_avail = vdev->vq[queue].vring.avail;
+    status->vring_used = vdev->vq[queue].vring.used;
+    status->used_idx = vdev->vq[queue].used_idx;
+    status->signalled_used = vdev->vq[queue].signalled_used;
+    status->signalled_used_valid = vdev->vq[queue].signalled_used_valid;
+
+    if (vdev->vhost_started) {
+        VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+        struct vhost_dev *hdev = vdc->get_vhost(vdev);
+
+        /* check if vq index exists for vhost as well  */
+        if (queue >= hdev->vq_index && queue < hdev->vq_index + hdev->nvqs) {
+            status->has_last_avail_idx = true;
+
+            int vhost_vq_index =
+                hdev->vhost_ops->vhost_get_vq_index(hdev, queue);
+            struct vhost_vring_state state = {
+                .index = vhost_vq_index,
+            };
+
+            status->last_avail_idx =
+                hdev->vhost_ops->vhost_get_vring_base(hdev, &state);
+        }
+    } else {
+        status->has_shadow_avail_idx = true;
+        status->has_last_avail_idx = true;
+        status->last_avail_idx = vdev->vq[queue].last_avail_idx;
+        status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;
+    }
+
+    return status;
+}
+
+static strList *qmp_decode_vring_desc_flags(uint16_t flags)
+{
+    strList *list = NULL;
+    strList *node;
+    int i;
+
+    struct {
+        uint16_t flag;
+        const char *value;
+    } map[] = {
+        { VRING_DESC_F_NEXT, "next" },
+        { VRING_DESC_F_WRITE, "write" },
+        { VRING_DESC_F_INDIRECT, "indirect" },
+        { 1 << VRING_PACKED_DESC_F_AVAIL, "avail" },
+        { 1 << VRING_PACKED_DESC_F_USED, "used" },
+        { 0, "" }
+    };
+
+    for (i = 0; map[i].flag; i++) {
+        if ((map[i].flag & flags) == 0) {
+            continue;
+        }
+        node = g_malloc0(sizeof(strList));
+        node->value = g_strdup(map[i].value);
+        node->next = list;
+        list = node;
+    }
+
+    return list;
+}
+
+VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
+                                                     uint16_t queue,
+                                                     bool has_index,
+                                                     uint16_t index,
+                                                     Error **errp)
+{
+    VirtIODevice *vdev;
+    VirtQueue *vq;
+    VirtioQueueElement *element = NULL;
+
+    vdev = virtio_device_find(path);
+    if (vdev == NULL) {
+        error_setg(errp, "Path %s is not a VirtIO device", path);
+        return NULL;
+    }
+
+    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, queue)) {
+        error_setg(errp, "Invalid virtqueue number %d", queue);
+        return NULL;
+    }
+    vq = &vdev->vq[queue];
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        error_setg(errp, "Packed ring not supported");
+        return NULL;
+    } else {
+        unsigned int head, i, max;
+        VRingMemoryRegionCaches *caches;
+        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+        MemoryRegionCache *desc_cache;
+        VRingDesc desc;
+        VirtioRingDescList *list = NULL;
+        VirtioRingDescList *node;
+        int rc; int ndescs;
+
+        RCU_READ_LOCK_GUARD();
+
+        max = vq->vring.num;
+
+        if (!has_index) {
+            head = vring_avail_ring(vq, vq->last_avail_idx % vq->vring.num);
+        } else {
+            head = vring_avail_ring(vq, index % vq->vring.num);
+        }
+        i = head;
+
+        caches = vring_get_region_caches(vq);
+        if (!caches) {
+            error_setg(errp, "Region caches not initialized");
+            return NULL;
+        }
+        if (caches->desc.len < max * sizeof(VRingDesc)) {
+            error_setg(errp, "Cannot map descriptor ring");
+            return NULL;
+        }
+
+        desc_cache = &caches->desc;
+        vring_split_desc_read(vdev, &desc, desc_cache, i);
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
+            int64_t len;
+            len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
+                                           desc.addr, desc.len, false);
+            desc_cache = &indirect_desc_cache;
+            if (len < desc.len) {
+                error_setg(errp, "Cannot map indirect buffer");
+                goto done;
+            }
+
+            max = desc.len / sizeof(VRingDesc);
+            i = 0;
+            vring_split_desc_read(vdev, &desc, desc_cache, i);
+        }
+
+        element = g_new0(VirtioQueueElement, 1);
+        element->avail = g_new0(VirtioRingAvail, 1);
+        element->used = g_new0(VirtioRingUsed, 1);
+        element->name = g_strdup(vdev->name);
+        element->index = head;
+        element->avail->flags = vring_avail_flags(vq);
+        element->avail->idx = vring_avail_idx(vq);
+        element->avail->ring = head;
+        element->used->flags = vring_used_flags(vq);
+        element->used->idx = vring_used_idx(vq);
+        ndescs = 0;
+
+        do {
+            /* A buggy driver may produce an infinite loop */
+            if (ndescs >= max) {
+                break;
+            }
+            node = g_new0(VirtioRingDescList, 1);
+            node->value = g_new0(VirtioRingDesc, 1);
+            node->value->addr = desc.addr;
+            node->value->len = desc.len;
+            node->value->flags = qmp_decode_vring_desc_flags(desc.flags);
+            node->next = list;
+            list = node;
+
+            ndescs++;
+            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache,
+                                                max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+        element->descs = list;
+done:
+        address_space_cache_destroy(&indirect_desc_cache);
+    }
+
+    return element;
+}
+
+static const TypeInfo virtio_device_info = {
+    .name = TYPE_VIRTIO_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(VirtIODevice),
+    .class_init = virtio_device_class_init,
+    .instance_finalize = virtio_device_instance_finalize,
+    .abstract = true,
+    .class_size = sizeof(VirtioDeviceClass),
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_device_info);
+}
+
+type_init(virtio_register_types)